## Student and Project Class Creation with Tests

### Creating Base Class: Students

In [2]:
class Student:
    # Initializing attributes for the object Student.
    def __init__(self, student_id, major, university):
        self.student_id = student_id
        self.major = major
        self.university = university



###  Creating Inheritance Class: Project

In [4]:
class Project(Student):
    # creating the object student and all of its attributes. Some will
    # require default values to survive tests.
   # def __init__(
    #    self, student_id, major, university, project_id, 
     #   data_points = [], __analysis_results = {}, __active = True
    #):
    def __init__(
        self, project_id, 
        data_points = [], __analysis_results = {}, __active = True
    ):
        self.student_id = student_id
        self.major = major
        self.university = university
        self.project_id = project_id
        self.data_points = data_points
        self.__analysis_results = __analysis_results
        self.__active = __active

        # Checking if any of the data points passed into the object 
        # contain anything other than floats or integers.
        for value in self.data_points:
             if not isinstance(value, (int, float)):
                print("Non-numerical value found:", value)
                raise ValueError("Non-numerical value found.")

    # Adding a new value to data points but first checking if the value
    # being passed in is an integer or float. If not returning an error
    def add_data(self, new_data):
        if isinstance(new_data, (int, float)):
            self.data_points.append(new_data)
            
        else:
            print("Input must be a number.")
            raise ValueError("Input must be a number")

    # If perform_analysis (further down) is called and runs correctly
    # this function will return the results as they can't (shouldn't) be 
    # be called directly considering they are private.
    def get_results(self):
        return self.__analysis_results

    # returns the private attribute of __active.
    def is_active(self):
        return self.__active

    # Lets the active status be changed but first checks if the status
    # being passed in is a bool and returning an error if its not
    def set_active(self, status):
        if isinstance(status, (bool)):
            self.__active = status

        else:
            print("Input must be either True or False.")
            raise ValueError

    # performing analysis on the data set to find the mean, median, and 
    # sample variance of the dataset. If the data set is empty, then 
    # we would end up with a divide by 0 situation 
    def perform_analysis(self):
        try:
            n = len(self.data_points)
            if n == 0:
                print("No data points found. Nothing to analyze.")
                return

            # Calculating mean
            total = 0.0
            
            for x in self.data_points:
                total += x
            mean = total / n

            # Calculating median
            ordered = sorted(self.data_points)
            if n % 2 == 1:
                median = ordered[n // 2]
                
            else:
                l_mid = ordered[n // 2-1]
                u_mid = ordered[n // 2]
                median = (l_mid + u_mid) / 2

            # Calculating sample variance
            sum_square_diff = 0.0
            for i in self.data_points:
                sum_square_diff += (i - mean) ** 2

            if n > 1:
                variance = sum_square_diff / (n - 1)

            # If n is less than 1 then it is 0 and the set is empty and
            # will be caught by the above exception. But if it is 1
            # then this would still cause a divide by zero exception as
            # we would have 0 divided by 0 for a sample variance.
            # However we can instead just consider any data set of 
            # of 0 to have a variance of 0 as obviously it doesn't
            # vary with itself and is just a quirk of the sample
            # variance equation hedging its bets with n-1. So we 
            # will set it equal to 0.0.
            else:
                variance = 0.0

            self.__analysis_results = {
                "mean": float(f"{mean:.6f}"),
                "median": float(f"{median:.6f}"),
                "variance": float(f"{variance:.6f}")
            }
        except:
            print("Error apparently")


#### Testing Proper Functionality 

In [5]:
# Test Student Class constructor with student_id 1001, major
# "Data Science, and university Harvard"

john = Student(1001, "Data Science", "Harvard")
print(f"Student id is : {john.student_id}\nMajor: {john.major}\nUniversity:{john.university}")



Student id is : 1001
Major: Data Science
University:Harvard


In [6]:
# Test Project Class constructor with student_id 1001, 
# major "Data Science, university Harvard, project_id 1, 
#data_points [65,75,95,99]"

my_project = Project(1001, "Data Science", "Harvard", 1, [65,75,95,99])
print(f"Student id is : {my_project.student_id}\nMajor: {my_project.major}\nUniversity:{my_project.university}\n\
project_id:{my_project.project_id}\ndata_points : {my_project.data_points}\nis_active:{my_project.is_active()}" )


TypeError: Project.__init__() takes from 2 to 5 positional arguments but 6 were given

In [5]:
#your test case here
# Testing if the proper error occurs when entering something with 
# add_data thats not a number. Will first test with a 
# a correct call and then one with an error. 
# The second should return raised value error and
# message

my_project1 = Project(1001, "Data Science", "Harvard", 1, [65,75,95,99])
my_project1.add_data(5)

print(f"data_points :{my_project1.data_points}" )

# Testing with a string to create an error
my_project2 = Project(1001, "Data Science", "Harvard", 1, [65,75,95,99])
my_project2.add_data("r")

print(f"data_points :{my_project2.data_points}" )

data_points :[65, 75, 95, 99, 5]
Input must be a number.


ValueError: Input must be a number

In [6]:
#your test case here
# Testing if it can find errors in the pre entered datapoints list.
# Should return a value error.
my_project3 = Project(1001, "Data Science", "Harvard", 1, [65,"75",95,99])
print(f"Student id is : {my_project.student_id}\nMajor: \
{my_project.major}\nUniversity:{my_project.university}\n\
project_id:{my_project.project_id}\ndata_points :\
{my_project.data_points}\nis_active:{my_project.is_active()}" )

Non-numerical value found: 75


ValueError: Non-numerical value found.

In [7]:
#your test case here
# Testing if the program detects the lack of a bool being entered for 
# status. Will test that it works properly first and then test the error
# Should also return an error eventually after returning the correct
# status
my_project4 = Project(1001, "Data Science", "Harvard", 1, [65,75,95,99])
my_project4.set_active(False)
print(f"is_active:{my_project4.is_active()}" )

my_project5 = Project(1001, "Data Science", "Harvard", 1, [65,75,95,99])
my_project5.set_active("Noodles")
print(f"is_active:{my_project5.is_active()}" )

is_active:False
Input must be either True or False.


ValueError: 

In [8]:
#your test case here
my_project6 = Project(1001, "Data Science", "Harvard", 1, [65,75,95,99])
my_project6.perform_analysis()
print(f"Student id is : {my_project6.student_id}\nMajor: \
{my_project6.major}\nUniversity:{my_project6.university}\n\
project_id:{my_project6.project_id}\ndata_points :\
{my_project6.data_points}\nis_active:{my_project6.is_active()}\
\nAnalysis of Project: {my_project6.get_results()}")


Student id is : 1001
Major: Data Science
University:Harvard
project_id:1
data_points :[65, 75, 95, 99]
is_active:True
Analysis of Project: {'mean': 83.5, 'median': 85.0, 'variance': 262.333333}


In [9]:
#your test case here
# Testing without a empty set to see if the zero error is caught

my_project7 = Project(1001, "Data Science", "Harvard", 1)
my_project7.perform_analysis()
print("Analysis of Project:", my_project7.get_results())

No data points found. Nothing to analyze.
Analysis of Project: {}


In [10]:
# Additional test for when there is only one data point, if that will 
# cause a divide by 0 sitution and how it will be handled.
my_project8 = Project(1001, "Data Science", "Harvard", 1, [5])
my_project8.perform_analysis()
print("Analysis of Project:", my_project8.get_results())

Analysis of Project: {'mean': 5.0, 'median': 5.0, 'variance': 0.0}


### Tests Class and Objects are created, and perform_analysis yields results as expected.

In [11]:
test1 = [(1000, 'Computer Science', 'HARVARD', 1, [65,75,95,99]),(1001, 'Computer Science', 'MIT', 2, [95,35,75,90,91]),\
         (1003,'Data Science', 'Cornell', 3, [75,85,95,99,33])]


for test in test1:
    project = Project(test[0],test[1],test[2],test[3],test[4])
    project.perform_analysis()
    project_info = f"Student id : {project.student_id}\nMajor: {project.major}\nUniversity:{project.university}\n\
project_id:{project.project_id}\ndata_points : {project.data_points}\nis_active:{project.is_active()}"
    print(project_info)
    print(f"analysis results :{project.get_results()}\n")


Student id : 1000
Major: Computer Science
University:HARVARD
project_id:1
data_points : [65, 75, 95, 99]
is_active:True
analysis results :{'mean': 83.5, 'median': 85.0, 'variance': 262.333333}

Student id : 1001
Major: Computer Science
University:MIT
project_id:2
data_points : [95, 35, 75, 90, 91]
is_active:True
analysis results :{'mean': 77.2, 'median': 90.0, 'variance': 614.2}

Student id : 1003
Major: Data Science
University:Cornell
project_id:3
data_points : [75, 85, 95, 99, 33]
is_active:True
analysis results :{'mean': 77.4, 'median': 85.0, 'variance': 702.8}



### Tests if data points can be added and data statistics are updated.

In [12]:
test = (1000, 'Computer Science', 'HARVARD', 1, [65,75,95,99])
project = Project(test[0],test[1],test[2],test[3],test[4])
project.perform_analysis()
print(f"analysis results before : {project.get_results()}")
project.add_data(22)
project.perform_analysis()
print(f"analysis results after: {project.get_results()}")


analysis results before : {'mean': 83.5, 'median': 85.0, 'variance': 262.333333}
analysis results after: {'mean': 71.2, 'median': 75.0, 'variance': 953.2}


### Tests if active can be set


In [13]:
test = (1000, 'Computer Science', 'HARVARD', 1, [65,75,95,99])
project = Project(test[0],test[1],test[2],test[3],test[4])
print(f"value of active at initialization: {project.is_active()}")
project.set_active(False)
print(f"value of active after calling set method: {project.is_active()}")

value of active at initialization: True
value of active after calling set method: False


### Tests if perform_analysis can handle exceptions for empty list 

In [14]:
test1 = (1000, 'Computer Science', 'HARVARD', 1, [])

print(f"Test #1 : Test perform analysis if the data points is an empty list.")
project = Project(test1[0],test1[1],test1[2],test1[3],test1[4])
project.perform_analysis()



    

Test #1 : Test perform analysis if the data points is an empty list.
No data points found. Nothing to analyze.


### Tests if constructor raises an error if non-numerical values are present in data_points

In [15]:
test2 = (1001, 'Computer Science', 'MIT', 2, [95,35,75,90,'a'])
#print(f"\nTest #3 : Test constructor if data points has non-numerical values.")
project = Project(test2[0],test2[1],test2[2],test2[3],test2[4])



Non-numerical value found: a


ValueError: Non-numerical value found.

### Tests if adding non-numerical values to data_points raises an error

In [16]:
test3 = (1000, 'Computer Science', 'HARVARD', 3, [100,100,100])
project = Project(test3[0],test3[1],test3[2],test3[3],test3[4])
project.add_data('xyz')


Input must be a number.


ValueError: Input must be a number

### Identification of Class Protocols in pandas
      

In [17]:
import pandas as pd
midterm_scores = [96,90,85]
df = pd.DataFrame(midterm_scores, columns=['midterm_scores'])

shape = df.shape
print(shape)
df.sort_values(by='midterm_scores', ascending=False)


(3, 1)


Unnamed: 0,midterm_scores
0,96
1,90
2,85


In [8]:
class Dog:
    total_dogs = 0

    def __init__(self, name):
        self.name = name

    def bark(self):
        print("Bark! My name is " + self.name)

my_dog = Dog("Rex")
my_dog.bark()

Bark! My name is Rex


1. According to the document pandas.DataFrame which we rename to pd to make it pd.DataFrame is the class name. The parameters being passed to it is the list of midterm scores along with the scores it contains as values, and columns is the other perameter with 'midterm_scores' string being the value that it brings with it.
2. The attribute being called on df is shape.
3. The method is sort_values and the parameters are by = 'midterm_scores' and ascending = False.


