# seaborn (lmplot -> regplot + FacetGrid) 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [None]:
print(f"pandas  version = {pd.__version__}")
print(f"seaborn version = {sns.__version__}")

## read miles per gallon dataset (mpg.csv)

In [None]:
url='https://github.com/prasertcbs/tutorial/raw/master/mpg.csv'
df=pd.read_csv(url)
df.head()

In [None]:
df['gear']=df.trans.str[0].map({'a':'auto', 'm':'manual'}).astype("category")
df['drv']=df.drv.map({'f':'front', 'r':'rear', '4':'4-wheel'}).astype('category')
df.head()

In [None]:
sns.lmplot(x='displ', y='cty', data=df, size=4.5)

In [None]:
sns.lmplot(x='displ', y='cty', data=df,
           fit_reg=False)

In [None]:
sns.lmplot(x='displ', y='cty', data=df,
           size=4,
           fit_reg=False,
           scatter_kws={'alpha':.3})

In [None]:
sns.lmplot(x='displ', y='cty', data=df,
           size=4,
           fit_reg=True,
           ci=False,
           scatter=False)

### add hue

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear')

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='drv')

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear',
           col='year',
           size=4,
           scatter_kws={'alpha':.3})

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear',
           col='year',
           row='class',
           size=4,
           scatter_kws={'alpha':.3})

## only regression line

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear',
           col='year',
           ci=False,
           scatter=False)

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear',
           col='class')

## matplotlib markers
https://matplotlib.org/api/markers_api.html

In [None]:
from IPython.display import IFrame
IFrame('https://matplotlib.org/api/markers_api.html', width=640, height=240)

### column wrap and size

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear',
           markers=["x", "v"], 
           col='class',
           col_wrap=3,
           size=4,
           palette="Set2",
           scatter_kws={'alpha':.5})

In [None]:
# palette dict

In [None]:
sns.lmplot(x='displ', y='cty', data=df, 
           hue='gear',
           markers=["x", "v"], 
           col='drv',
           col_wrap=3,
           size=3, aspect=1,
           palette={'auto':'tomato', 'manual':'#00ddcc'},
           scatter_kws={'alpha':.5})

## logistic regression
https://en.wikipedia.org/wiki/Logistic_regression  
A group of 20 students spend between 0 and 6 hours studying for an exam. How does the number of hours spent studying affect the probability that the student will pass the exam?

In [None]:
df2=pd.read_csv('https://github.com/prasertcbs/basic-dataset/raw/master/study_hours.csv')
df2.sample(5)

In [None]:
sns.lmplot(x='Hours', y='Pass', data=df2,
           logistic=True, size=4)
plt.ylabel('Probability of passing exam')

## lmplot vs regplot

In [None]:
url='https://github.com/prasertcbs/tutorial/raw/master/mpg.csv'
df=pd.read_csv(url)
df.head()

In [None]:
df['gear']=df.trans.str[0].map({'a':'auto', 'm':'manual'}).astype("category")
df['drv']=df.drv.map({'f':'front', 'r':'rear', '4':'4-wheel'}).astype('category')
df.head()

In [None]:
fig = plt.figure(figsize=(14,4))
x_col='displ'
y_col='cty'
fig.add_subplot(131)
sns.lmplot(x=x_col, y=y_col, data=df)
fig.add_subplot(132)
sns.lmplot(x=x_col, y=y_col, data=df, fit_reg=False)
fig.add_subplot(133)
sns.lmplot(x=x_col, y=y_col, data=df, scatter=False, ci=False)

In [None]:
fig = plt.figure(figsize=(14,4))
x_col='displ'
y_col='cty'
fig.add_subplot(131)
sns.regplot(x=x_col, y=y_col, data=df)
fig.add_subplot(132)
sns.regplot(x=x_col, y=y_col, data=df, fit_reg=False)
fig.add_subplot(133)
sns.regplot(x=x_col, y=y_col, data=df, scatter=False, ci=False)

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(14,4), sharey=True, sharex=True)
print(ax.shape)
x_col='displ'
y_col='cty'

sns.regplot(x=x_col, y=y_col, data=df, ax=ax[0])
sns.regplot(x=x_col, y=y_col, data=df, fit_reg=False, ax=ax[1])
sns.regplot(x=x_col, y=y_col, data=df, scatter=False, ci=False, ax=ax[2])

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(14,6), sharey=False, sharex=False)
print(ax.shape)
x_col='displ'
y_col='cty'

sns.regplot(x=x_col, y=y_col, data=df, ax=ax[0, 0])
sns.regplot(x=x_col, y=y_col, data=df, fit_reg=False, ax=ax[0, 1])
sns.regplot(x=x_col, y=y_col, data=df, scatter=False, ci=False, ax=ax[0, 2])
sns.distplot(df.cty, ax=ax[1, 0])
sns.violinplot(df.cty, ax=ax[1, 1])
sns.boxplot(df.cty, ax=ax[1, 2])