In [None]:
import pandas as pd
import seaborn as sns

#### 1. Attendance Data

Load the attendance.csv file and calculate an attendance percentage for each student. One half day is worth 50% of a full day, and 10 tardies is equal to one absence.

You should end up with something like this:


name  
Billy    0.5250  
Jane     0.6875  
John     0.9125  
Sally    0.7625  
Name: grade, dtype: float64

In [None]:
df = pd.read_csv('untidy-data/attendance.csv')

In [None]:
df.head()

In [None]:
# melt the dataframe

df_melt = df.melt(id_vars='Unnamed: 0',var_name = 'day', value_name='grade')
df_melt

In [None]:
# rename the columns

df_melt.columns = ['student', 'day', 'grade']
df_melt.head()

In [None]:
# function which will convert letter grades in numbers

def letter_grade(a):
    if a == 'P':
        return 1
    elif a == 'H':
        return 0.5
    elif a == 'T':
        return 0.9
    else:
        return 0

In [None]:
# apply the functions to the dataframe

df_melt['num_grade'] = df_melt['grade'].apply(letter_grade)

In [None]:
df_melt.head()

In [None]:
# use groupby to calculate mean grade

df_melt.groupby('student').num_grade.mean()

In [None]:
# Another way (use .replace)

df1 = df_melt.replace({'P': 1, 'A': 0, 'H': 0.5, 'T': 0.9})
df1.head()

In [None]:
#groupby student and find mean of 'value'
df1.groupby('student').grade.mean()

### 2.Coffee Levels

Read the coffee_levels.csv file.  
Transform the data so that each carafe is in it's own column.  
Is this the best shape for the data?   

In [None]:
df = pd.read_csv('untidy-data/coffee_levels.csv')

In [None]:
df

In [None]:
# plot data
sns.lineplot(x = 'hour',
             y = 'coffee_amount',
             data = df,
             hue = 'coffee_carafe'
);

In [None]:
# Lets pivot the table
df_pivot = df.pivot_table(index = ['hour'],
                    columns = 'coffee_carafe',
                    values = 'coffee_amount').reset_index()

df_pivot

In [None]:
df_pivot[['x','y', 'z']].plot();

#### 3. Cake Recipes  

Read the cake_recipes.csv data. This data set contains cake tastiness scores for combinations of different recipes, oven rack positions, and oven temperatures.  
Tidy the data as necessary.  
Which recipe, on average, is the best? recipe b  
Which oven temperature, on average, produces the best results? 275  
Which combination of recipe, rack position, and temperature gives the best result? recipe b, bottom rack, 300 degrees  

In [None]:
df = pd.read_csv('untidy-data/cake_recipes.csv')

In [None]:
df.head()

In [None]:
# split the 'recipe:position' column
df['recipe:position'].str.split(":", expand = True)

In [None]:
# expand the column and create two new ones
df[['recipe', 'position']] = df['recipe:position'].str.split(':', expand = True)

In [None]:
df.head()

In [None]:
# drop column 'recipe:position'
df.drop(columns = 'recipe:position', inplace = True)
df

In [None]:
# melt data
df_melt = df.melt(id_vars = ['recipe', 'position'], var_name = 'temperature', value_name = 'score')
df_melt.head()

In [None]:
# Which recipe, on average, is the best?
df_melt.groupby(['recipe']).score.mean()

In [None]:
best_recipe = df_melt.groupby(['recipe']).score.mean().idxmax()
best_score = df_melt.groupby(['recipe']).score.mean().max()
print(f' The best recipe is \'{best_recipe}\' with score of {round(best_score, 1)}')

In [None]:
# Which oven temperature, on average, produces the best results?
df_melt.groupby('temperature').score.mean()

In [None]:
best_temp = df_melt.groupby(['temperature']).score.mean().idxmax()
best_score = df_melt.groupby(['temperature']).score.mean().max()
print(f' The best temperature is \'{best_temp}F\' with score of {round(best_score, 1)}')

In [None]:
# Which combination of recipe, rack position, and temperature gives the best result?
df_melt.groupby(['temperature', 'recipe', 'position']).score.mean()

In [None]:
best_comb = df_melt.groupby(['temperature', 'recipe', 'position']).score.mean().idxmax()
best_score = df_melt.groupby(['temperature', 'recipe', 'position']).score.mean().max()
print(f' The best combination is \'{best_comb}\' with score of {round(best_score, 1)}')