# Appendix 3: Python Libraries Crash Course

## Part 7: Data Visualization with Seaborn

## First Steps

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic['pclass'] = titanic['pclass'].astype(str) # required to avoid AttributeError: 'numpy.int64' object has no attribute 'startswith'

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=2, palette= "viridis")
sns.countplot(data = titanic, x = "sex", hue = "pclass")
plt.show()

## Categorical Plots

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.5)
sns.stripplot(data = titanic, x = "sex", y = "age", jitter = True, hue = "pclass", dodge = True)
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.5)
sns.swarmplot(data = titanic, x = "sex", y = "age", hue = "pclass", dodge = True)
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.5)
sns.violinplot(data = titanic, x = "sex", y = "age", hue = "pclass", dodge = True)
sns.swarmplot(data = titanic, x = "sex", y = "age", hue = "pclass", dodge = True, color="black")
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.5)
sns.violinplot(data = titanic, x = "pclass", y = "age", hue = "sex", dodge = True, split = True )
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.5)
sns.barplot(data = titanic, x = "pclass", y = "age", hue = "sex", dodge = True)
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.5)
sns.pointplot(data = titanic, x = "pclass", y = "age", hue = "sex", dodge = True)
plt.show()

## Jointplots / Regression

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
sns.set(font_scale=1.5)
sns.jointplot(data = titanic, x = "age", y = "fare", height = 8, kind = "reg")
plt.show()

In [None]:
sns.set(font_scale=1.5)
sns.lmplot(data = titanic, x = "age", y = "fare", aspect= 1, height=8, col = "sex")
plt.show()

In [None]:
sns.set(font_scale=1.5)
sns.lmplot(data = titanic, x = "age", y = "survived", aspect= 1, height=8, col = "sex", logistic= True)
plt.show()

In [None]:
sns.set(font_scale=1.5)
sns.lmplot(data = titanic, x = "age", y = "survived", aspect= 1, height=8, col = "pclass", logistic= True)
plt.show()

## Matrixplots / Heatmaps

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
pd.crosstab(titanic.sex, titanic.pclass)

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.4)
sns.heatmap(pd.crosstab(titanic.sex, titanic.pclass), annot= True, fmt = "d", cmap = "Reds", vmax = 150)
plt.show()

In [None]:
pd.crosstab(titanic.sex, titanic.pclass, values= titanic.survived, aggfunc= "mean")

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.4)
sns.heatmap(pd.crosstab(titanic.sex, titanic.pclass, values= titanic.survived, aggfunc= "mean"), annot= True, cmap = "Reds")
plt.show()

In [None]:
titanic.corr(numeric_only = True) # new since Pandas 2.0: numeric_only required with mixed data types

In [None]:
plt.figure(figsize=(12,8))
sns.set(font_scale=1.4)
sns.heatmap(titanic.corr(numeric_only = True), annot= True, cmap = "Reds")
plt.show()