In [None]:
import random

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

n_sides = 6
n_rolls = 10000
n_dice = 2

results = []
for roll in range(n_rolls):
    result = 0
    for die in range(n_dice):
        result += random.randint(1, n_sides)
    results.append(result)

fig, ax = plt.subplots(1,2, width_ratios=[1, 3])
fig.set_size_inches(10, 5)

sns.stripplot(results,
              alpha=.1,
              ax=ax[0])

sns.histplot(results, 
             binwidth=1, 
             binrange=(n_dice-.5, n_dice*n_sides+.5),
             alpha=1,
             ax=ax[1]
)
plt.xlabel('Roll result')
fig.tight_layout()

In [None]:
sns.histplot(results, 
             binwidth=1, 
             binrange=(n_dice-.5, n_dice*n_sides+.5),
             alpha=1,
)
plt.xlabel('Roll result')
plt.axvline(sum(results)/ len(results), linestyle='dashed', color='black')
fig.tight_layout()

In [None]:
towns = pd.read_html('https://en.wikipedia.org/wiki/Local_government_in_the_United_States')[2]
towns['Constituents per Councilmember'].hist()

In [None]:
df = pd.DataFrame.from_dict({
    'style': ['Strawberry', 'Blueberry', 'Cherry', 'Lemon meringue', 'Chocolate', 'Sweet potato', 'Apple', 'Pecan', 'Pumpkin', 'Others'],
    'share': [2, 3, 3, 4, 9, 10, 14, 17, 36, 2]
})
df.set_index('style').plot.pie(subplots=True, autopct='%.0f%%', legend=False)
plt.ylabel('')
plt.savefig('pie.svg')

In [None]:
sns.set_style("whitegrid")
sns.barplot(data=df, y='style', x='share')

plt.ylabel('')
plt.savefig('bar.svg')

In [None]:
df = pd.read_csv('SAT Report 2015-2016.csv', index_col='index', na_values='*')

In [None]:
sns.displot(data=df, x='AvgScrMath')

In [None]:
penguins = sns.load_dataset("penguins")
penguins

In [None]:
sns.displot(data=penguins, x="flipper_length_mm", hue="species", col="sex", kind="kde")

In [None]:
sns.displot(data=penguins, y="flipper_length_mm", hue="species", col="sex", kind="ecdf")
plt.xticks([0.0, .25, .5, .75, 1.]);

In [None]:
sns.displot(data=penguins, x="flipper_length_mm", hue="species", kind="kde")
plt.vlines(penguins.groupby(by='species')['flipper_length_mm'].mean(), 
           ymin=0, 
           ymax=0.028,
           linestyles='dashed',
           colors=[ax.get_color() for ax in plt.gca().lines[::-1]])


In [None]:
diamonds = sns.load_dataset('diamonds')
sns.displot(diamonds, x='price')
plt.xlabel('Sale price')

plt.vlines(diamonds.price.mean(), 
           ymin=0, 
           ymax=6500,
           linestyles='dashed')

plt.vlines(diamonds.price.median(), 
           ymin=0, 
           ymax=6500,
           linestyles='dashed')

In [None]:
adelie = penguins.query('species=="Adelie"')
sns.stripplot(data=adelie, x="flipper_length_mm")
plt.xlabel('Flipper length [mm]');

In [None]:
sns.displot(data=adelie, x="flipper_length_mm")
plt.xlabel('Flipper length [mm]');

In [None]:
adelie.flipper_length_mm.describe()

In [None]:
195-186

In [None]:
adelie.flipper_length_mm.mean()

In [None]:
adelie.flipper_length_mm.median()

In [None]:
adelie.flipper_length_mm.mean() + adelie.flipper_length_mm.std()

In [None]:
adelie.flipper_length_mm.var()

In [None]:
sns.displot(data=adelie, x="flipper_length_mm", y="bill_length_mm", kind="kde", rug=True)
plt.xlabel("Flipper length [mm]")
plt.ylabel("Bill length [mm]");

In [None]:
sns.scatterplot(adelie, x='flipper_length_mm', y='bill_length_mm')

In [None]:
from scipy.stats import ttest_ind

chinstrap = penguins.query('species == "Chinstrap"')

ttest_ind(adelie.flipper_length_mm.dropna(), chinstrap.flipper_length_mm.dropna())

In [None]:
sns.displot(penguins.query('species in ["Adelie", "Chinstrap"]'), x='flipper_length_mm', hue='species')
plt.vlines(penguins.query('species in ["Adelie", "Chinstrap"]').groupby(by='species')['flipper_length_mm'].mean(), 
           ymin=0, 
           ymax=30,
           linestyles='dashed',
           colors=[
               (*plt.gca().containers[1].patches[0]._facecolor[:3], 1.0),
               (*plt.gca().containers[0].patches[0]._facecolor[:3], 1.0)
           ])
plt.xlabel("Flipper length [mm]");