# Student Alcohol Consumption

### Introduction:

This time you will download a dataset from the UCI.

### Step 1. Import the necessary libraries

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_rows',15)

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv).

### Step 3. Assign it to a variable called df.

In [5]:
df = pd.read_csv('student-mat.csv')
df.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column

In [6]:
df_sel = df.loc[:,'school':'guardian']

### Step 5. Create a lambda function that will capitalize strings.

In [68]:
# Solution 1
# cap_fun = lambda x: x[0].upper()+x[1:].lower()

# Solution 2
cap_fun = lambda x: x.capitalize()
df[['Mjob', 'Fjob']].applymap(cap_fun)

Unnamed: 0,Mjob,Fjob
0,At_home,Teacher
1,At_home,Other
2,At_home,Other
3,Health,Services
4,Other,Other
5,Services,Other
6,Other,Other
...,...,...
388,Teacher,Services
389,Other,Other


### Step 6. Capitalize both Mjob and Fjob

In [61]:
# Solution 1
df[['Mjob', 'Fjob']] = df[['Mjob', 'Fjob']].apply(cap_fun)
df[['Mjob', 'Fjob']]

AttributeError: ("'Series' object has no attribute 'upper'", 'occurred at index Mjob')

In [70]:
# Solution 2 
df[['Mjob', 'Fjob']] = df[['Mjob', 'Fjob']].applymap(str.capitalize)
df[['Mjob', 'Fjob']]

Unnamed: 0,Mjob,Fjob
0,At_home,Teacher
1,At_home,Other
2,At_home,Other
3,Health,Services
4,Other,Other
5,Services,Other
6,Other,Other
...,...,...
388,Teacher,Services
389,Other,Other


### Step 7. Print the last elements of the data set.

In [71]:
df.tail(3)

Unnamed: 0,school,sex,legal_drinker,age,address,famsize,Pstatus,Medu,Fedu,Mjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
392,MS,M,True,21,R,GT3,T,1,1,Other,...,5,5,3,3,3,3,3,10,8,7
393,MS,M,True,18,R,LE3,T,3,2,Services,...,4,4,1,3,4,5,0,11,12,10
394,MS,M,True,19,U,LE3,T,1,1,Other,...,3,2,3,3,3,5,5,8,9,9


### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob.

### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)

In [29]:
# Solution 1
def majority(age):
    return age>17

print(majority(25), majority(17))
df.loc[0:3, 'age'].apply(majority)

True False


0     True
1    False
2    False
3    False
Name: age, dtype: bool

In [30]:
# Solution 2
majority = lambda x: x>17

print(majority(25), majority(17))
df.loc[0:3, 'age'].apply(majority)

True False


0     True
1    False
2    False
3    False
Name: age, dtype: bool

In [44]:
df.insert(int((df.columns=='age').argmax())+1, 'legal_drinker', df['age'].apply(majority))
df

Unnamed: 0,school,sex,legal_drinker,age,address,famsize,Pstatus,Medu,Fedu,Mjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,True,18,U,GT3,A,4,4,AT_HOME,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,False,17,U,GT3,T,1,1,AT_HOME,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,False,15,U,LE3,T,1,1,AT_HOME,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,False,15,U,GT3,T,4,2,HEALTH,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,False,16,U,GT3,T,3,3,OTHER,...,4,3,2,1,2,5,4,6,10,10
5,GP,M,False,16,U,LE3,T,4,3,SERVICES,...,5,4,2,1,2,5,10,15,15,15
6,GP,M,False,16,U,LE3,T,2,2,OTHER,...,4,4,4,1,1,3,0,12,12,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
388,MS,F,True,18,U,LE3,T,3,1,TEACHER,...,4,3,4,1,1,1,0,7,9,8
389,MS,F,True,18,U,GT3,T,1,1,OTHER,...,1,1,1,1,1,5,0,6,5,0


### Step 10. Multiply every number of the dataset by 10. 
##### I know this makes no sense, don't forget it is just an exercise

In [58]:
df_num = df.loc[:,[t in ['int64', 'float'] for t in df.dtypes]]
df_num.head(3)

Unnamed: 0,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,18,4,4,2,2,0,4,3,4,1,1,3,6,5,6,6
1,17,1,1,1,2,0,5,3,3,1,1,3,4,5,5,6
2,15,1,1,1,2,3,4,3,2,2,3,3,10,7,8,10


In [73]:
df_num.applymap(lambda x: x*10).head(3)

Unnamed: 0,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,180,40,40,20,20,0,40,30,40,10,10,30,60,50,60,60
1,170,10,10,10,20,0,50,30,30,10,10,30,40,50,50,60
2,150,10,10,10,20,30,40,30,20,20,30,30,100,70,80,100
