# Solutions to Exercises

## Unit 4.1: Data visualization with matplotlib

### Question 0

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# import menu and display the first two rows of the dataframe
menu = pd.read_csv("data/mcdonalds_menu.csv")

# determine number of items and create barplot
print("Question 0:")
print("Distribution of the calories:")
plt.hist(menu['Calories'])
plt.xlabel('Calories')
plt.ylabel('Items')
plt.show()

### Question 1

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# import menu and display the first two rows of the dataframe
menu = pd.read_csv("data/mcdonalds_menu.csv")
print(menu.head(5))

# display simple statistics about the data frame
print(menu.describe())

# determine number of items and create barplot
print("Question 1:")
print("Total number of items:", len(menu.Item.unique()))
menu.groupby('Category')['Item'].count().plot(kind='bar')
plt.show()

### Question 2

In [None]:
# analysis fat per category
print("Question 2:")
menu.boxplot(column=['Total Fat (% Daily Value)'], by=['Category'], rot=90)
plt.show()

grp_by_category = menu[['Category', 'Total Fat (% Daily Value)','Trans Fat','Saturated Fat (% Daily Value)', 'Cholesterol (% Daily Value)' ]].groupby(['Category']).max() #extracting the wanted columns, grouping by categories and calculating the max
grp_by_category.reset_index(inplace=True) #resetting the index (otherwise category is the new index and it messes up with merge)
grp_by_category.columns=['Category', 'Max_Fat', 'Max_Trans_Fat', 'Max_Sat_Fat', 'Max_Cholestrol'] #renaming the columns
print(grp_by_category) #displaying the new dataframe

df = menu.merge(grp_by_category) #merging the two dataframes by the only common column ("Category")
mask = df['Total Fat (% Daily Value)'] == df.Max_Fat #creating the mask that will be used for the selection
fatty_menu = df.loc[mask, ['Category','Item','Total Fat (% Daily Value)','Cholesterol (% Daily Value)']] #selection the items that correspond to the max of total fat (%daily value) per category
print(fatty_menu) #displaying the dataframe

trans_menu = df.loc[(df['Trans Fat'] == df.Max_Trans_Fat) & (df['Trans Fat']>0)][['Category','Item','Total Fat (% Daily Value)','Trans Fat','Saturated Fat (% Daily Value)','Cholesterol (% Daily Value)']] #creating a new filter
print(trans_menu.sort_values(by='Trans Fat',ascending=False)) #displaying the dataframe sorted by Trans Fat (decreasing order)


### Question 3

In [None]:
# top 10 vitamin C
print("Question 3:")
pd.pivot_table(menu, index=['Item'], values=['Vitamin C (% Daily Value)']).sort_values(['Vitamin C (% Daily Value)'], ascending=False)[:10].plot(kind="bar")
plt.show()

### Question 4

In [None]:
# nutrition feature comparison
print("Question 4:")
selection = menu.loc[:,['Calories', 'Total Fat', 'Saturated Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Sugars', 'Protein']]
pd.plotting.scatter_matrix(selection, diagonal='kde', figsize=(12,12), grid=True)
plt.show()

## Unit 4.2: Working with date and time

### Question 1

In [None]:
from datetime import datetime

given_date = datetime(2020, 2, 25)
print("Given date is")
print(given_date.strftime('%A %d %B %Y'))

### Question 2

In [None]:
import time

milliseconds = int(round(time.time() * 1000))
print(milliseconds)

### Question 3

In [None]:
from datetime import datetime

# 2020-02-25
date_1 = datetime(2020, 2, 25).date()
# 2020-09-17
date_2 = datetime(2020, 9, 17).date()

delta = None
if date_1 > date_2:
    print("date_1 is greater")
    delta = date_1 - date_2
else:
    print("date_2 is greater")
    delta = date_2 - date_1
print("Difference is", delta.days, "days")

### Question 4

In [None]:
import pandas as pd
from datetime import date

df = pd.read_csv('data/grant_winners.csv')
df['Award date'] = df['Award date'].apply(date.fromisoformat)
start_date = date(2022,9,1)
end_date = date(2024,8,31)
selected_df = df[(df['Award date'] >= start_date) & (df['Award date'] <= end_date)]
print(selected_df['Title project'].tolist())

## Unit 4.3: Matrix computation

### Question 0

In [None]:
import numpy as np

# 1. Using the two vectors `v_1` and `v_2` at the beginning of this notebook, create a matrix `m` whose rows are `v_1` and `v_2`.
v_1 = [0.5, 3, 2.5]
v_2 = [-1, 3.5, 2]
m = np.array([v_1, v_2])
print(m)

In [None]:
# 2. Print the average position of all flies in the room (i.e., the average of `v_1` and `v_2`). Do it directly on `m`.
print(m.mean(axis=0))

In [None]:
# 3. Print the distance between the two flies
a_1 = np.array(v_1)
a_2 = np.array(v_2)
a_diff = a_1 - a_2
print(np.linalg.norm(a_diff))

### Question 1

In [None]:
import numpy as np
import pandas as pd

# import menu and display the first two rows of the dataframe
menu = pd.read_csv("data/mcdonalds_menu.csv")
print(menu.head(5))

# display simple statistics about the data frame
print(menu.describe())

# top 3 muscle food
menu['Protein/Sugar'] = np.where(menu['Sugars'] < 1, menu['Sugars'], menu['Protein']/menu['Sugars'])
print(menu.sort_values('Protein/Sugar', ascending=False).head(3))