# Lab | Matplotlib & Seaborn

#### Import all the necessary libraries here:

In [None]:
# Libraries
import numpy as np
import pandas as pd

import warnings
warnings.simplefilter('ignore')

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

pd.set_option('display.max_columns', None)

## Challenge 
#### Import the `Fitbit2` dataset and store it in a variable called `fitbit`. You can find the dataset in Ironhack's database:
* db: `fitbit`
* table: `fitbit2`

In [None]:
# your code here
path = '../data/Fitbit2.csv'
fitbit = pd.read_csv(path)
fitbit.columns=[e.lower().replace(' ', '_') for e in fitbit.columns]
fitbit.head()

#### From the Fitbit data, we want to visually understand:

How the average number of steps change by month. Use the appropriate visualization to show the median steps by month. Is Fitbitter more active on weekend or workdays?
**Hints**:

* Use Months_encoded and Week_or Weekend columns.
* Use matplolib.pyplot object oriented API.
* Set your size figure to 12,4
* Explore plt.sca
* Explore plt.xticks
* Save your figures in a folder called `figures` in your repo. 

In [None]:
# AVERAGE STEPS BY MONTH_ENCODED
fm = fitbit.steps.groupby(fitbit.months_encoded).mean()

In [None]:
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dic']

In [None]:
ax=fm.plot(
        
        kind='bar',
        grid=True,
        figsize=(12, 4),
        title='Average steps per month'
)

ax.set_xlabel('Months')
ax.set_ylabel('Steps (u.a)')
plt.xticks([i for i in range(12)],labels=month, rotation = 50)

plt.show();

In [None]:
# AVERAGE STEPS BY WORK_OR_WEEKEND
fn = fitbit.steps.groupby(fitbit.work_or_weekend).mean()

In [None]:
ax=fn.plot(
        
        kind='bar',
        grid=True,
        figsize=(12, 4),
        title='Average steps'
)

ax.set_xlabel('work or weekend')
ax.set_ylabel('Steps (u.a)')
plt.xticks([i for i in range(2)],labels=['work', 'weekend'], rotation = 50)

plt.show();

#### Write a loop to plot 3 scatter plots of the following features:

* Minutes Lightly Active vs Steps    
* Minutes Very Active vs Steps    
* Minutes Sedentary vs Steps  

In [None]:
# your code here
fig, ax = plt.subplots(1, 3)

for i,e in enumerate(['minutes_lightly_active', 'minutes_very_active', 'minutes_sedentary']):
    fitbit.plot(ax=ax[i], x=e, y='steps', kind='scatter')

## Challenge 

#### Import the `titanic` dataset and store it in a variable called `titanic`. You can find the dataset in Ironhack's database:
* db: `titanic`
* table: `titanic`

In [None]:
# your code here
path2 = '../data/titanic.csv'
titan = pd.read_csv(path2)

#### Explore the titanic dataset using Pandas dtypes.

In [None]:
# your code here
titan.columns=[e.lower().replace(' ', '_') for e in titan.columns]
titan.info()

#### What are your numerical variables? What are your categorical variables?
**Hint**: Use Pandas select_dtypes.

In [None]:
# NUMERICAL VARIABLES
# your code here
titan.select_dtypes('int64', 'float64')

In [None]:
# CATEGORICAL VARIABLES
# your code here
titan.select_dtypes('object')

#### Set the plot style to classic and the figure size to (12,6).
**Hint**: To set the style you can use matplotlib or seaborn functions. Do some research on the matter.

In [None]:
# your code here
import matplotlib as mp
mp.style.use('classic')
plt.figure(figsize=[12,6])

#### Use the right visulalization to show the distribution of column `Age`.

In [None]:
# your code here
sns.histplot(titan.age, binwidth=1)

#### Use subplots and plot the distribution of the `Age`  with bins equal to 10, 20 and 50.

In [None]:
# your code here
fig, ax = plt.subplots(1,3)
a = [10,20,50]
for i,e in enumerate(a):
    sns.histplot(titan.age, bins = e, ax=ax[i])

#### How does the bin size affect your plot?

In [None]:
"""
el tamaño de las bins determina el numero de barras del histograma
"""

#### Use seaborn to show the distribution of column `Age`.

In [None]:
# your code here
sns.histplot(titan.age, binwidth=1);

#### Use the right plot to visualize column `Gender`. There are 2 ways of doing it. Do it both ways.
**Hint**: Use matplotlib and seaborn.

In [None]:
ax = titan.gender.value_counts()

In [None]:
# Method 1 - matplotlib
ax.plot(
        
        kind='bar',
        grid=True,
        figsize=(12, 4),
        title='Gender'
)
plt.xticks(rotation = 50)

plt.show();

In [None]:
# Method 2 - seaborn
sns.countplot(x = titan.gender)

#### Use the right plot to visualize the column `Pclass`.

In [None]:
# your code here
sns.countplot(x=titan.pclass)

#### We would like to have in one single plot the summary statistics of the feature `Age`. What kind of plot would you use? Plot it. 

In [None]:
# your code here
sns.boxplot(x = titan.age);

In [None]:
"""
your comments here
"""

#### What does the last plot tell you about the feature `Age`?

In [None]:
"""
Te permite conocer los Q1, Q2, Q3 y todo lo que ello conlleva"""

#### Now in addition to the summary statistics, we want to have in the same plot the distribution of `Age`. What kind of plot would you use? Plot it. 

In [None]:
# your code here
sns.violinplot(x=titan.age)

#### What additional information does the last plot provide about feature `Age`?

In [None]:
"""
la densidad
"""

#### We suspect that there is a linear relationship between `Fare` and `Age`. Use the right plot to show the relationship between these 2 features. There are 2 ways, please do it both ways.
**Hint**: Use matplotlib and seaborn.

In [None]:
# Method 1 - matplotlib
# your code here
corr=titan.corr(method='pearson')

mascara=np.triu(np.ones_like(corr, dtype=bool))   # genera una mascara para tapar valores

plt.matshow(titan.corr())

In [None]:
# Method 2 - seaborn
# your code here
corr=titan.corr(method='pearson')
sns.set(style='white')     # estilo blanco


mascara=np.triu(np.ones_like(corr, dtype=bool))   # genera una mascara para tapar valores


cmap=sns.diverging_palette(0, 10, as_cmap=True)   # paleta de colores


sns.heatmap(corr,
            mask=mascara,
            cmap=cmap,
            vmax=1,
            center=0,
            square=True,
            linewidth=0.5,
            cbar_kws={'shrink': 0.5},
            annot=True
           );

#### Plot the correlation matrix using seaborn.

In [None]:
# your code here
np.triu(np.ones_like(corr, dtype=bool))

#### What are the most correlated features?

In [None]:
"""
pcclass y fara
"""

#### Use the most appropriate plot to display the summary statistics of `Age` depending on `Pclass`.

In [None]:
# your code here
sns.boxplot(data = titan, x = 'pclass', y ='age')

#### Use seaborn to plot the distribution of `Age` based on the `Gender`.
**Hint**: Use Facetgrid.

In [None]:
# your code here
sns.violinplot(data=titan, x='gender', y='age');