In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from pydataset import data

## Read the data from the attendance table and calculate an attendance percentage for each student. One half day is worth 50% of a full day, and 10 tardies is equal to one absence.

In [212]:
attendance = pd.read_csv('untidy-data/attendance.csv')
attendance.head()

Unnamed: 0.1,Unnamed: 0,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05,2018-01-06,2018-01-07,2018-01-08
0,Sally,P,T,T,H,P,A,T,T
1,Jane,A,P,T,T,T,T,A,T
2,Billy,A,T,A,A,H,T,P,T
3,John,P,T,H,P,P,T,P,P


In [213]:
attendance.shape

(4, 9)

In [214]:
attendance = attendance.rename(columns={'Unnamed: 0':'name'})
attendance.head()


Unnamed: 0,name,2018-01-01,2018-01-02,2018-01-03,2018-01-04,2018-01-05,2018-01-06,2018-01-07,2018-01-08
0,Sally,P,T,T,H,P,A,T,T
1,Jane,A,P,T,T,T,T,A,T
2,Billy,A,T,A,A,H,T,P,T
3,John,P,T,H,P,P,T,P,P


In [215]:
attd_date = attendance.melt(id_vars ='name',
                            var_name='date',
                            value_name='grade')
attd_date.head()

Unnamed: 0,name,date,grade
0,Sally,2018-01-01,P
1,Jane,2018-01-01,A
2,Billy,2018-01-01,A
3,John,2018-01-01,P
4,Sally,2018-01-02,T


In [216]:
attd_date.shape

(32, 3)

In [218]:
attd_date['grade'].replace(['P','A','H','T'],[1,0,.5,.9], inplace= True)

attd_date.head()

    



Unnamed: 0,name,date,grade
0,Sally,2018-01-01,1.0
1,Jane,2018-01-01,0.0
2,Billy,2018-01-01,0.0
3,John,2018-01-01,1.0
4,Sally,2018-01-02,0.9


In [219]:
attd_date.groupby('name').mean()

Unnamed: 0_level_0,grade
name,Unnamed: 1_level_1
Billy,0.525
Jane,0.6875
John,0.9125
Sally,0.7625


# Coffee Levels

- ## Read the coffee_levels table.
- ## Transform the data so that each carafe is in it's own column.
- ## Is this the best shape for the data?

In [130]:
coffee = pd.read_csv('untidy-data/coffee_levels.csv')
coffee.head()

Unnamed: 0,hour,coffee_carafe,coffee_amount
0,8,x,0.816164
1,9,x,0.451018
2,10,x,0.843279
3,11,x,0.335533
4,12,x,0.898291


In [45]:
coffee.shape

(30, 3)

In [47]:
coffee.dtypes

coffee_carafe     object
hour              object
amount           float64
dtype: object

In [52]:
coffee.coffee_carafe.value_counts()

x    80
y    80
z    80
Name: coffee_carafe, dtype: int64

In [58]:
coffee_melt = coffee.pivot_table(index='coffee_carafe',
                                 columns='hour', 
                                 values='coffee_amount')
coffee_melt

hour,8,9,10,11,12,13,14,15,16,17
coffee_carafe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
x,0.816164,0.451018,0.843279,0.335533,0.898291,0.310711,0.507288,0.215043,0.183891,0.39156
y,0.189297,0.521502,0.023163,0.235529,0.017009,0.997464,0.058361,0.144644,0.544676,0.594126
z,0.999264,0.91599,0.144928,0.311495,0.771947,0.39852,0.864464,0.436364,0.280621,0.436677


# Cake Recipes

- ### Read the cake_recipes table. This data set contains cake tastiness scores for combinations of different recipes, oven rack positions, and oven temperatures.
- ### Tidy the data as necessary.
- ### Which recipe, on average, is the best? recipe b
- ### Which oven temperature, on average, produces the best results? 275
- ### Which combination of recipe, rack position, and temperature gives the best result? recipe b, bottom rack, 300 degrees

In [118]:
cake= pd.read_csv('untidy-data/cake_recipes.csv')
cake.head()

Unnamed: 0,recipe:position,225,250,275,300
0,a:bottom,61.738655,53.912627,74.41473,98.786784
1,a:top,51.709751,52.009735,68.576858,50.22847
2,b:bottom,57.09532,61.904369,61.19698,99.248541
3,b:top,82.455004,95.224151,98.594881,58.169349
4,c:bottom,96.470207,52.001358,92.893227,65.473084


In [119]:
cake['recipe:position'].str.split(':', expand=True)

Unnamed: 0,0,1
0,a,bottom
1,a,top
2,b,bottom
3,b,top
4,c,bottom
5,c,top
6,d,bottom
7,d,top


In [120]:
cake[['recipe','position']]= cake['recipe:position'].str.split(':', expand=True)
cake.head()

Unnamed: 0,recipe:position,225,250,275,300,recipe,position
0,a:bottom,61.738655,53.912627,74.41473,98.786784,a,bottom
1,a:top,51.709751,52.009735,68.576858,50.22847,a,top
2,b:bottom,57.09532,61.904369,61.19698,99.248541,b,bottom
3,b:top,82.455004,95.224151,98.594881,58.169349,b,top
4,c:bottom,96.470207,52.001358,92.893227,65.473084,c,bottom


In [121]:
cake.select_dtypes(include='object').columns.to_list()

['recipe:position', 'recipe', 'position']

In [122]:
cake = cake.drop(columns='recipe:position')
cake.head()

Unnamed: 0,225,250,275,300,recipe,position
0,61.738655,53.912627,74.41473,98.786784,a,bottom
1,51.709751,52.009735,68.576858,50.22847,a,top
2,57.09532,61.904369,61.19698,99.248541,b,bottom
3,82.455004,95.224151,98.594881,58.169349,b,top
4,96.470207,52.001358,92.893227,65.473084,c,bottom


In [124]:
cake.recipe.value_counts()

a    2
b    2
c    2
d    2
Name: recipe, dtype: int64

In [125]:
cake_melt = cake.melt(id_vars=['recipe','position'],
                     var_name='temperature',
                     value_name='taste_score')

In [145]:
cake_melt.head()

Unnamed: 0,recipe,position,temperature,taste_score
0,a,bottom,225,61.738655
1,a,top,225,51.709751
2,b,bottom,225,57.09532
3,b,top,225,82.455004
4,c,bottom,225,96.470207


In [127]:
cake_melt.shape

(32, 4)

In [188]:
# Which recipe, on average, is the best?
cake_melt.groupby('recipe').mean().nlargest(1,'taste_score')

#from the table below recipe b has highest average score

Unnamed: 0_level_0,taste_score
recipe,Unnamed: 1_level_1
b,76.736074


In [187]:
# Which oven temperature, on average, produces the best results? 275
cake_melt.groupby('temperature').mean().nlargest(1,'taste_score')


Unnamed: 0_level_0,taste_score
temperature,Unnamed: 1_level_1
275,74.886754


<bound method GroupBy.max of <pandas.core.groupby.generic.SeriesGroupBy object at 0x7fce860814f0>>

In [189]:
#Which combination of recipe, rack position, and temperature gives the best result? recipe b, 
# bottom rack, 300 degrees
cake_melt.groupby(['recipe','position','temperature']).mean().nlargest(1,'taste_score')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,taste_score
recipe,position,temperature,Unnamed: 3_level_1
b,bottom,300,99.248541
