# Solutions to Exercises

This notebook is based on Anna-Lena Lamprecht's CoTaPP repository (https://github.com/annalenalamprecht/CoTaPP). Some modifications were made.

## Unit 3.1: Object-Oriented Programming

### 1. Room Occupancy Revisited

In [None]:
class Room:    

    # class variable to keep a set of all created rooms
    all_rooms = set()
    
    # method for creating a new room with set number and maximum number of guests
    def __init__(self,number,max_guests):
        self.number = number
        self.max_guests = max_guests
        self.guests = []
        Room.all_rooms.add(self)
        
    # class method for printing all rooms and their current guests
    @classmethod
    def printOccupancy(cls):
        for room in cls.all_rooms:
            print(f"{room.number} (max. {room.max_guests}):\t{room.guests}")
    
    # class method for getting the room (object) for a given room number
    @classmethod
    def getRoom(cls, number):
        for room in cls.all_rooms:
            if room.number == number:
                return room
        return None

    # method for checking in a guest
    def checkIn(self, guest):
        if (len(self.guests) < self.max_guests):
            self.guests.append(guest)
        else:
            print("Room is already full.")
            
    # method for checking out a guest
    def checkOut(self, guest):
        if guest in self.guests:
            self.guests.remove(guest)
        else:
            print(f"{guest} is not a guest in this room.")
            
    
################
# Main program #
################

# create some rooms
Room(101, 4)
Room(102, 2)
Room(201, 3)
Room(202, 2)

# do things with the rooms
while True:
    print("These are your options:")
    print("1 - View current room occupancy.")
    print("2 - Check guest in.")
    print("3 - Check guest out.")
    print("4 - Exit program.")
    choice = input("Please choose what you want to do: ") 
    if choice == "1":
        Room.printOccupancy()
    elif choice == "2":
        guest = input("Enter name of guest: ")
        number = int(input("Enter room number: "))
        room = Room.getRoom(number)
        if room != None:
            room.checkIn(guest)
        else:
            print("Not a valid room number.")
    elif choice == "3":
        guest = input("Enter name of guest: ")
        number = int(input("Enter room number: "))
        room = Room.getRoom(number)
        if room != None:
            room.checkOut(guest)
        else:
            print("Not a valid room number.")
    elif choice == "4":
        print("Goodbye!")
        break
    else:
        print("Invalid input, try again.")

### 2. People at the University

In [None]:

# base class person
class Person:
    
    # init person object with its name
    def __init__(self, name):
        self.name = name
        
    # print out the name of the person
    def printInfo(self):
        print(f"I am {self.name}.")
        

# derived class student
class Student(Person):
    
    # init student object as a person, then add other attributes
    def __init__(self,name,university,program):
        Person.__init__(self,name)
        self.university = university
        self.program = program
        self.creditpoints = None
        
    # print out the name, university and program of the student
    def printInfo(self):
        Person.printInfo(self)
        print(f"I am a student at {self.university}. "
              f"I study {self.program}.")
        
    # set the number of credit points
    def setCreditPoints(self,points):
        self.creditpoints = points
        
    # get the number of credit points
    def getCreditPoints(self):
        return self.creditpoints
        
# subclasses for bachelor and master students
class BachelorStudent(Student):
    
    # init a bachelor student as student, add school
    def __init__(self,name,university,program,school):
        Student.__init__(self,name,university,program)
        self.school = school
    
    # print out the student information, plus the school
    def printInfo(self):
        Student.printInfo(self)
        print(f"I went to school in {self.school}.")
        
class MasterStudent(Student):
    
    # init a master student as a student, add bachelor's degree
    def __init__(self,name,university,program,bdegree):
        Student.__init__(self,name,university,program)
        self.bdegree = bdegree
                
    # print out the student information, plus the bachelor's degree
    def printInfo(self):
        Student.printInfo(self)
        print(f"I have a Bachelor's degree in {self.bdegree}.")
        
# derived class Teacher
class Lecturer(Person):
    
    # init lecturer as a person, add university and department info
    def __init__(self,name,university,department):
        Person.__init__(self,name)
        self.university = university
        self.department = department
    
    # print out lecturer information
    def printInfo(self):
        Person.printInfo(self)
        print(f"I am a lecturer at {self.university}, {self.department}.")
        
# derived class Teaching Assistant
class TeachingAssistant(Student,Lecturer):
    
    # init ta as a student, add department
    def __init__(self,name,university,program,department):
        Student.__init__(self,name,university,program)
        Lecturer.__init__(self,name,university,department)
        
    # prinnt out lecturer information, add program
    def printInfo(self):
        Lecturer.printInfo(self)
        print(f"I am also a student of {self.program}.")
        

## test program ##
student1 = BachelorStudent("Alice", "UU", "Biology", "Amsterdam")
student2 = MasterStudent("Bob", "UU", "Chemistry", "Biophysics")
lecturer = Lecturer("Cindy","UU", "Information and Computing Sciences")
ta = TeachingAssistant("Dennis", "UU", "Computer Science", "Information and Computing Sciences")

student1.printInfo()
student1.setCreditPoints(150)
print(f"{student1.name} has {student1.getCreditPoints()} points.")
student2.printInfo()
student2.setCreditPoints(45)
print(f"{student2.name} has {student2.getCreditPoints()} points.")
lecturer.printInfo()
ta.printInfo()
print(f"{ta.name} has {ta.getCreditPoints()} points.")

## Unit 3.2: CSV files, Pandas, tabular data

### 0. Challenge Solutions

In [None]:
def word_frequencies(file):
    frequencies = {}
    with open(file, "r") as file:
        content = file.read()
        for word in content.split():
            if word in frequencies:
                frequencies[word] += 1
            else:
                frequencies[word] = 1
        return frequencies
    
def print_selected_municipalities():
    with open("data/dutch_municipalities.csv", "r") as csvfile:
        csvreader = csv.DictReader(csvfile, delimiter='\t')
        for row in csvreader:
            income = row["avg_household_income_2012"]
            if income != "" and int(row["avg_household_income_2012"]) > 40000:
                print(f'{row["municipality"]}: {row["province"]}')

def safe_print_selected_municipalities():
    with open("data/dutch_municipalities.csv", "r") as csvfile:
        csvreader = csv.DictReader(csvfile, delimiter='\t')
        for row in csvreader:
            try: 
                if int(row["avg_household_income_2012"]) > 40000:
                    print(f'{row["municipality"]}: {row["province"]}')
            except ValueError:
                print(f'No INCOME for --> {row["municipality"]}: {row["province"]}')

### 1. Interview Anonymization

In [None]:
interview_file = "data/interview-with-a-syrian-refugee.txt"
new_file = "data/interview-with-a-syrian-refugee-anonymized.txt"

try:
    # read original interview text from file
    with open(interview_file, "r") as file:
        text = file.read()

    # write obfuscated interview text to file
    with open(new_file, "w") as file:
        file.write(text.replace("Samira","Amal"))

# if interview file is not found, inform user accordingly
except FileNotFoundError:
    print(f"File {interview_file} not found.")

# for any other error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err)

### 2. Longest Word

In [None]:
# function to find the longest word in a text
def find_longest_word(text):

    # initialize running length counter and word
    length = 0
    word = ""

    # initialize variables for storing the max. length and longest word
    max_length = 0
    longest_word = ""

    # for all characters in the text ...
    for character in text:
    
        # check if the character is a letter (part of a word).
        if character.isalpha():
            # if yes, increment the length counter and 
            # add the character to the word to remember
            length += 1
            word += character
        else:
            # reset running variables
            length = 0
            word = ""

        # check if the last word was longer then the previous longest word
        if length > max_length:
            # if yes, remember the new max. length and longest word
            max_length = length
            longest_word = word
    
    return longest_word


# main program
text_file = "data/interview-with-a-syrian-refugee.txt"

try:
    # read original interview text from file
    with open(text_file, "r") as file:
        text = file.read()

# if input file is not found, inform user accordingly
except FileNotFoundError:
    print(f"File {text_file} not found.")

# for any other error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err)
    
# print result
print(f"The longest word in the text is \"{find_longest_word(text)}\".")

### 3. Randomized Story-Telling

In [None]:
import pandas as pd
import sys
import random

# set path to input file
infile = "data/inputs.csv"

try:
    # read input file as dataframe
    df_in = pd.read_csv(infile, sep=",")

# for any error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err) 
    sys.exit()


# ask user how many sentences should be created
while True:
    try:
        number = int(input("How many sentences do you want to create? "))
        break
    except ValueError:
        print("That was no valid number. Try again.") 
    
# create the desired number of sentences
while number > 0:

    # select a random value for each of the four sentence elements
    who = df_in.loc[random.randint(0,df_in["who"].size-1),"who"]
    does_what = df_in.loc[random.randint(0,df_in["does what"].size-1),"does what"]
    how = df_in.loc[random.randint(0,df_in["how"].size-1),"how"]
    where = df_in.loc[random.randint(0,df_in["where"].size-1),"where"]
    
    print(f"{who} {does_what} {how} {where}.")

    number -= 1

### 4. Population and Universities per Province

In [None]:
import pandas as pd
import sys

# set paths to input and output file
infile = "data/dutch_municipalities.csv"
outfile = "data/dutch_provinces.csv"


try:
    # read input file as dataframe
    df_in = pd.read_csv(infile, sep="\t")

# for any error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err) 
    sys.exit()

# init new empty dataframe with the wanted columns
df_out = pd.DataFrame(columns=["province", "population", "universities"])

# get province names (as sorted set)
provinces = sorted(set(df_in["province"]))

# for all provinces ...
for province in provinces:
    # get the part of the dataframe for the province
    df_province = df_in[df_in["province"]==province]
    
    # sum up universities and population and add to new data frame
    df_out = df_out.append({"province":province,\
                            "population":df_province["population"].sum(),\
                            "universities":df_province["university"].sum()},\
                            ignore_index=True)
  
try:
    # save new dataframe as csv file
    df_out.to_csv(outfile, index=False)

# for any error, display the exception message
except Exception as err:
    print("Something went wrong...")
    print(err) 
    sys.exit()
    

# Another possible, but longer solution is with the csv package and 
# dictionaries, as shown below (without try/except error handling).
#
# import csv
#
## create two empty dictionaries to collect the aggregated data
#universities_per_province = {}
#population_per_province = {}
#
## read in the data and iterate over all rows, adding up
## population and university numbers per province
#with open("dutch_municipalities.csv", "r") as csvfile:
#    csvreader = csv.DictReader(csvfile, delimiter='\t')
#    for row in csvreader:
#        if row["province"] not in universities_per_province:
#            universities_per_province[row["province"]] = int(row["university"])

## Unit 3.3: Join two dataframes, group by and correlations of variables

### 1. Analysis of the McDonald’s Menu

#### Question a

In [None]:
import pandas as pd

# import menu and display the first two rows of the dataframe
menu = pd.read_csv("data/mcdonalds_menu.csv")

# determine number of items and create barplot
print("Total number of items:", len(menu.Item.unique()))
print(menu.groupby('Category')['Item'].count())

The most represented category is "Coffee & Tea"

#### Question b

In [None]:
# analysis fat per category
grp_by_category = menu[['Category', 'Total Fat (% Daily Value)','Trans Fat','Saturated Fat (% Daily Value)', 'Cholesterol (% Daily Value)' ]].groupby(['Category']).max() #extracting the wanted columns, grouping by categories and calculating the max
grp_by_category.reset_index(inplace=True) #resetting the index (otherwise category is the new index and it messes up with merge)
grp_by_category.columns=['Category', 'Max_Fat', 'Max_Trans_Fat', 'Max_Sat_Fat', 'Max_Cholestrol'] #renaming the columns
print(grp_by_category) #displaying the new dataframe

df = menu.merge(grp_by_category) #merging the two dataframes by the only common column ("Category")
mask = df['Total Fat (% Daily Value)'] == df.Max_Fat #creating the mask that will be used for the selection
fatty_menu = df.loc[mask, ['Category','Item','Total Fat (% Daily Value)','Cholesterol (% Daily Value)']] #selection the items that correspond to the max of total fat (%daily value) per category
print(fatty_menu) #displaying the dataframe

trans_menu = df.loc[(df['Trans Fat'] == df.Max_Trans_Fat) & (df['Trans Fat']>0)][['Category','Item','Total Fat (% Daily Value)','Trans Fat','Saturated Fat (% Daily Value)','Cholesterol (% Daily Value)']] #creating a new filter
print(trans_menu.sort_values(by='Trans Fat',ascending=False)) #displaying the dataframe sorted by Trans Fat (decreasing order)

#### Question c

In [None]:
# anything healthy?
healthy = df.loc[(df['Trans Fat']==0) & (df['Sugars']<20) & (df['Total Fat (% Daily Value)']<=20) & (df['Cholesterol (% Daily Value)']==0), ['Category','Item','Calories']].sort_values('Calories', ascending=False)
print(healthy[(healthy['Category']!="Beverages") & (healthy['Category']!="Coffee & Tea")])

#### Question d

In [None]:
# top 10 vitamin C
print("Question 4:")
print(pd.pivot_table(menu, index=['Item'], values=['Vitamin C (% Daily Value)']).sort_values(['Vitamin C (% Daily Value)'], ascending=False)[:10])

#### Question e

In [None]:
# nutrition feature comparison
selection = menu.loc[:,['Calories', 'Total Fat', 'Saturated Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Sugars', 'Protein']]
print(selection.corr())