In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as stats

# Attendance Data

In [24]:
attendance = pd.read_csv('attendance.csv')
attendance

Unnamed: 0.1,Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05,2018-01-06,2018-01-07,2018-01-08
0,Sally,P,T,T,H,P,A,T,T
1,Jane,A,P,T,T,T,T,A,T
2,Billy,A,T,A,A,H,T,P,T
3,John,P,T,H,P,P,T,P,P


In [17]:
attendance.shape

(4, 9)

In [25]:
attendance = attendance.replace(['T', 'P', 'A', 'H'], [.9, 1.0, 0.0, 0.5])

In [19]:
attendance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  4 non-null      object 
 1   2018-01-01  4 non-null      float64
 2   2018-01-02  4 non-null      float64
 3   2018-01-03  4 non-null      float64
 4   2018-01-04  4 non-null      float64
 5   2018-01-05  4 non-null      float64
 6   2018-01-06  4 non-null      float64
 7   2018-01-07  4 non-null      float64
 8   2018-01-08  4 non-null      float64
dtypes: float64(8), object(1)
memory usage: 416.0+ bytes


In [26]:
attendance_melt = attendance.melt(id_vars=['Unnamed: 0'], var_name='Days', value_name='Pass')
attendance_melt

Unnamed: 0.1,Unnamed: 0,Days,Pass
0,Sally,2018-01-01,1.0
1,Jane,2018-01-01,0.0
2,Billy,2018-01-01,0.0
3,John,2018-01-01,1.0
4,Sally,2018-01-02,0.9
5,Jane,2018-01-02,1.0
6,Billy,2018-01-02,0.9
7,John,2018-01-02,0.9
8,Sally,2018-01-03,0.9
9,Jane,2018-01-03,0.9


In [27]:
attendance_melt = attendance_melt.rename(columns = {'Unnamed: 0': 'Name', 'Pass' : 'Grade'})
attendance_melt.head()

Unnamed: 0,Name,Days,Grade
0,Sally,2018-01-01,1.0
1,Jane,2018-01-01,0.0
2,Billy,2018-01-01,0.0
3,John,2018-01-01,1.0
4,Sally,2018-01-02,0.9


In [29]:
attendance_melt.pivot_table(index = 'Name', values = 'Grade', aggfunc= 'mean').reset_index()

Unnamed: 0,Name,Grade
0,Billy,0.525
1,Jane,0.6875
2,John,0.9125
3,Sally,0.7625


# Coffee Levels

In [30]:
coffee = pd.read_csv('coffee_levels.csv')
coffee.head()

Unnamed: 0,hour,coffee_carafe,coffee_amount
0,8,x,0.816164
1,9,x,0.451018
2,10,x,0.843279
3,11,x,0.335533
4,12,x,0.898291


In [31]:
coffee.shape

(30, 3)

In [34]:
coffee.pivot_table(index = 'hour',columns='coffee_carafe', values = 'coffee_amount').reset_index()

coffee_carafe,hour,x,y,z
0,8,0.816164,0.189297,0.999264
1,9,0.451018,0.521502,0.91599
2,10,0.843279,0.023163,0.144928
3,11,0.335533,0.235529,0.311495
4,12,0.898291,0.017009,0.771947
5,13,0.310711,0.997464,0.39852
6,14,0.507288,0.058361,0.864464
7,15,0.215043,0.144644,0.436364
8,16,0.183891,0.544676,0.280621
9,17,0.39156,0.594126,0.436677


What does hour represent, maybe shift it to something that is more understandable and the values that are represented

# Cake Recipe

In [39]:
coffee = pd.read_csv('cake_recipes.csv')
coffee.head()

Unnamed: 0,recipe:position,225,250,275,300
0,a:bottom,61.738655,53.912627,74.41473,98.786784
1,a:top,51.709751,52.009735,68.576858,50.22847
2,b:bottom,57.09532,61.904369,61.19698,99.248541
3,b:top,82.455004,95.224151,98.594881,58.169349
4,c:bottom,96.470207,52.001358,92.893227,65.473084


In [36]:
coffee['recipe:position'].str.split(':', expand = True)

Unnamed: 0,0,1
0,a,bottom
1,a,top
2,b,bottom
3,b,top
4,c,bottom
5,c,top
6,d,bottom
7,d,top


In [37]:
coffee[['recipe', 'position']] = coffee['recipe:position'].str.split(':', expand = True)

In [38]:
coffee.head()

Unnamed: 0,recipe:position,225,250,275,300,recipe,position
0,a:bottom,61.738655,53.912627,74.41473,98.786784,a,bottom
1,a:top,51.709751,52.009735,68.576858,50.22847,a,top
2,b:bottom,57.09532,61.904369,61.19698,99.248541,b,bottom
3,b:top,82.455004,95.224151,98.594881,58.169349,b,top
4,c:bottom,96.470207,52.001358,92.893227,65.473084,c,bottom


In [40]:
coffee_melt = coffee.melt(id_vars=['recipe:position'], var_name='Temp', value_name='Taste_score')
coffee_melt

Unnamed: 0,recipe:position,Temp,Taste_score
0,a:bottom,225,61.738655
1,a:top,225,51.709751
2,b:bottom,225,57.09532
3,b:top,225,82.455004
4,c:bottom,225,96.470207
5,c:top,225,71.306308
6,d:bottom,225,52.799753
7,d:top,225,96.873178
8,a:bottom,250,53.912627
9,a:top,250,52.009735


In [41]:
coffee_melt[['recipe', 'position']] = coffee_melt['recipe:position'].str.split(':', expand = True)
coffee_melt

Unnamed: 0,recipe:position,Temp,Taste_score,recipe,position
0,a:bottom,225,61.738655,a,bottom
1,a:top,225,51.709751,a,top
2,b:bottom,225,57.09532,b,bottom
3,b:top,225,82.455004,b,top
4,c:bottom,225,96.470207,c,bottom
5,c:top,225,71.306308,c,top
6,d:bottom,225,52.799753,d,bottom
7,d:top,225,96.873178,d,top
8,a:bottom,250,53.912627,a,bottom
9,a:top,250,52.009735,a,top


In [42]:
coffee_melt = coffee_melt.drop(columns = ['recipe:position'])
coffee_melt.head()

Unnamed: 0,Temp,Taste_score,recipe,position
0,225,61.738655,a,bottom
1,225,51.709751,a,top
2,225,57.09532,b,bottom
3,225,82.455004,b,top
4,225,96.470207,c,bottom


In [43]:
coffee_melt = coffee_melt[['recipe', 'position', 'Temp', 'Taste_score']]
coffee_melt.head()

Unnamed: 0,recipe,position,Temp,Taste_score
0,a,bottom,225,61.738655
1,a,top,225,51.709751
2,b,bottom,225,57.09532
3,b,top,225,82.455004
4,c,bottom,225,96.470207


In [47]:
coffee_melt.pivot_table(index = 'recipe',values = 'Taste_score', aggfunc= 'mean').reset_index()

Unnamed: 0,recipe,Taste_score
0,a,63.922201
1,b,76.736074
2,c,75.874748
3,d,62.864844


In [59]:
coffee_melt.groupby('recipe').Taste_score.mean().idxmax()

'b'

In [48]:
coffee_melt.pivot_table(index = 'Temp',values = 'Taste_score', aggfunc= 'mean').reset_index()

Unnamed: 0,Temp,Taste_score
0,225,71.306022
1,250,66.577437
2,275,74.886754
3,300,66.627655


In [63]:
coffee_melt.groupby('Temp').Taste_score.mean().idxmax()

'275'

In [56]:
coffee_melt.pivot_table(index = ['position', 'recipe'],columns= 'Temp',
                        values = 'Taste_score', aggfunc= 'max') #.reset_index()

Unnamed: 0_level_0,Temp,225,250,275,300
position,recipe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bottom,a,61.738655,53.912627,74.41473,98.786784
bottom,b,57.09532,61.904369,61.19698,99.248541
bottom,c,96.470207,52.001358,92.893227,65.473084
bottom,d,52.799753,58.670419,51.747686,56.18311
top,a,51.709751,52.009735,68.576858,50.22847
top,b,82.455004,95.224151,98.594881,58.169349
top,c,71.306308,82.795477,92.098049,53.960273
top,d,96.873178,76.101363,59.57162,50.971626


In [62]:
coffee_melt.groupby(['position', 'recipe', 'Temp']).Taste_score.mean().idxmax()

('bottom', 'b', '300')