In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
pokemon = pd.read_csv('pokemon.csv')
print(pokemon.shape)
pokemon.head()

## Playing with graphs and charts

### Chart: Pokemon counts per Generation

In [None]:
sns.countplot(data = pokemon, x = 'generation_id');

In [None]:
base_color = sns.color_palette()[0]
sns.countplot(data = pokemon, x = 'generation_id', color = base_color);

In [None]:
bar_order = pokemon.generation_id.value_counts().index
sns.countplot(data = pokemon, x = 'generation_id', color = base_color, order = bar_order);

### Chart: Pokemon count by Type 1

In [None]:
sns.countplot(data=pokemon, x='type_1');

In [None]:
sns.countplot(data=pokemon, y='type_1', color=base_color);

In [None]:
type_order = pokemon.type_1.value_counts().index
sns.countplot(data=pokemon, x='type_1', color=base_color, order = type_order);
plt.xticks(rotation = 90);

### Chart: Absolute Frequency turned into Relative Frequency 

In [None]:
pkmn_types = pokemon.melt(id_vars = ['id','species'],
                         value_vars = ['type_1', 'type_2'],
                         var_name = 'type_level', value_name = 'type').dropna()
pkmn_types[802:812]

In [None]:
type_counts = pkmn_types.type.value_counts()
type_order2 = type_counts.index
sns.countplot(data=pkmn_types, y='type', color=base_color, order=type_order2);

In [None]:
# Need to createticks that represent proportions / percentages.
# Start by finding the largest proportion - water type:
n_pokemon = pokemon.shape[0]
max_type_count = type_counts[0]
max_prop = max_type_count / n_pokemon
print(max_prop)

In [None]:
tick_props = np.arange(0, max_prop, 0.02)
tick_names = ['{:0.2f}'.format(v) for v in tick_props]

In [None]:
sns.countplot(data=pkmn_types, y='type', color=base_color, order=type_order2);
plt.xticks(tick_props * n_pokemon, tick_names)
plt.xlabel('proportion');

In [None]:
sns.countplot(data=pkmn_types, y='type', color=base_color, order=type_order2);
plt.xticks(tick_props * n_pokemon, tick_names)
plt.xlabel('proportion');

for i in range(type_counts.shape[0]):
    count = type_counts[i]
    pct_string = '{:0.1f}%'.format(100*count/n_pokemon)
    plt.text(count+1, i, pct_string, va = 'center');

### Histogram: Pokemon Speed

In [None]:
plt.hist(data = pokemon, x = 'speed');

In [None]:
bins = np.arange(0, pokemon.speed.max()+5, 5)
plt.hist(data = pokemon, x = 'speed', bins = bins);

In [None]:
sns.distplot(pokemon.speed);

In [None]:
sns.distplot(pokemon.speed, kde = False);

## Outliers & Axis Limits

### Histograms of Pokemon Heights

In [None]:
bins = np.arange(0, pokemon['height'].max()+.5, .5)
plt.hist(data = pokemon, x = 'height', bins = bins);

In [None]:
bins = np.arange(0, pokemon['height'].max()+.2, .2)
plt.hist(data = pokemon, x = 'height', bins = bins);
plt.xlim((0,6));

In [None]:
plt.figure(figsize = [10,5])

plt.subplot(1,2,1)
bins = np.arange(0, pokemon['height'].max()+.5, .5)
plt.hist(data = pokemon, x = 'height', bins = bins);

plt.subplot(1,2,2)
bins = np.arange(0, pokemon['height'].max()+.2, .2)
plt.hist(data = pokemon, x = 'height', bins = bins);
plt.xlim(0,6);

## Scales & Transformations

### Histograms of Pokemon Weights

In [None]:
bins = np.arange(0, pokemon['weight'].max()+40,40)
plt.hist(data = pokemon, x='weight', bins = bins);

In [None]:
bins = np.arange(0, pokemon['weight'].max()+40,40)
plt.hist(data = pokemon, x='weight', bins = bins);
plt.xscale('log');

In [None]:
pokemon.weight.describe()

In [None]:
np.log10(pokemon.weight.describe())

In [None]:
bins = 10 ** np.arange(-1, 3+0.1, 0.1)
plt.hist(data = pokemon, x='weight', bins = bins);
plt.xscale('log');

In [None]:
bins = 10 ** np.arange(-1, 3+0.1, 0.1)
ticks = [0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]
labels = ['{}'.format(v) for v in ticks]
plt.hist(data = pokemon, x='weight', bins = bins);
plt.xscale('log');
plt.xticks(ticks, labels);