## Exercises following along to Multivariate Visualization Lesson

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
fuel_econ = pd.read_csv('fuel-econ.csv')

sedan_classes = ['Minicompact Cars', 'Subcompact Cars', 'Compact Cars', 'Midsize Cars', 'Large Cars']

vclasses = pd.api.types.CategoricalDtype(ordered = True, categories = sedan_classes)
fuel_econ['VClass'] = fuel_econ['VClass'].astype(vclasses);

fuel_econ['trans_type'] = fuel_econ['trans'].apply(lambda x: x.split()[0])

### Examples of Visualization with Non-Positional Encodings for Third Variables

In [None]:
np.random.seed(2018)
sample = np.random.choice(fuel_econ.shape[0], 200, replace = False)
fuel_econ_subset = fuel_econ.loc[sample]

In [None]:
sns.regplot(data = fuel_econ_subset, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
ttype_markers = [['Automatic', 'o'], 
                  ['Manual', '^']]

for ttype, marker in ttype_markers:
    plot_data = fuel_econ_subset.loc[fuel_econ_subset['trans_type'] == ttype]
    sns.regplot(data = plot_data, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False,
                marker = marker);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
for ttype, marker in ttype_markers:
    plot_data = fuel_econ_subset.loc[fuel_econ_subset['trans_type'] == ttype]
    sns.regplot(data = plot_data, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False,
                marker = marker);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');
plt.legend(['Automatic', 'Manual']);

In [None]:
sns.regplot(data = fuel_econ_subset, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False,
           scatter_kws = {'s' : fuel_econ_subset['co2']});
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
sns.regplot(data = fuel_econ_subset, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False,
           scatter_kws = {'s' : fuel_econ_subset['co2']/2});
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
sns.regplot(data = fuel_econ_subset, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False,
           scatter_kws = {'s' : fuel_econ_subset['co2']/2});
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

base_color = sns.color_palette()[0]
sizes = [200, 350, 500]
legend_obj = []
for s in sizes:
    legend_obj.append(plt.scatter([],[], s = s/2, color = base_color))
plt.legend(legend_obj, sizes, title = 'CO2 (g/mi)');

### Examples of Using Color for Encoding

In [None]:
sns.regplot(data = fuel_econ_subset, x = 'displ', y = 'comb', x_jitter = 0.04, fit_reg = False);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'trans_type')
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'trans_type',
                 hue_order = ['Automatic', 'Manual'])
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'trans_type',
                 hue_order = ['Automatic', 'Manual'], height=4, aspect = 1.5)
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'trans_type',
                 hue_order = ['Automatic', 'Manual'], height=4, aspect = 1.5)
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
g.add_legend()
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'VClass',
                 height=4, aspect = 1.5)
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
g.add_legend()
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'VClass',
                 height=4, aspect = 1.5, palette = 'viridis_r')
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
g.add_legend()
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
g = sns.FacetGrid(data = fuel_econ_subset, hue = 'co2',
                 height=4, aspect = 1.5, palette = 'viridis_r')
g.map(sns.regplot, 'displ', 'comb', x_jitter = 0.04, fit_reg = False);
g.add_legend()
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

In [None]:
plt.scatter(data = fuel_econ_subset, x = 'displ', y = 'comb', c = 'co2', 
            cmap = 'viridis_r')
plt.colorbar(label = 'CO2 (g/mi)')
plt.xlabel('Displacement')
plt.ylabel('Combined Fuel Eff. (mpg)');

### Examples of Faceting

In [None]:
bins = np.arange(12, 58+2, 2)
g = sns.FacetGrid(data = fuel_econ, col =  'VClass')
g.map(plt.hist, 'comb', bins = bins);

In [None]:
g = sns.FacetGrid(data = fuel_econ, col =  'VClass')
g.map(plt.scatter, 'displ', 'comb');

In [None]:
g = sns.FacetGrid(data = fuel_econ, col =  'VClass', row = 'trans_type')
g.map(plt.scatter, 'displ', 'comb');

In [None]:
g = sns.FacetGrid(data = fuel_econ, col =  'VClass', row = 'trans_type', margin_titles = True)
g.map(plt.scatter, 'displ', 'comb');

### Examples of Other Adaptions of Bivariate Plots

In [None]:
sns.pointplot(data = fuel_econ, x = 'VClass', y = 'comb', ci = 'sd', linestyles = '');
plt.xticks(rotation = 15)
plt.ylabel('Ag. combined efficiency (mpg)');

In [None]:
sns.pointplot(data = fuel_econ, x = 'VClass', y = 'comb', ci = 'sd',
              hue = 'trans_type', linestyles = '');
plt.xticks(rotation = 15)
plt.ylabel('Ag. combined efficiency (mpg)');

In [None]:
sns.pointplot(data = fuel_econ, x = 'VClass', y = 'comb', ci = 'sd',
              hue = 'trans_type', linestyles = '', dodge = True);
plt.xticks(rotation = 15)
plt.ylabel('Ag. combined efficiency (mpg)');

In [None]:
sns.barplot(data = fuel_econ, x = 'VClass', y = 'comb', ci = 'sd',
              hue = 'trans_type');
plt.xticks(rotation = 15)
plt.ylabel('Ag. combined efficiency (mpg)');

In [None]:
sns.boxplot(data = fuel_econ, x = 'VClass', y = 'comb', hue = 'trans_type');
plt.xticks(rotation = 15)
plt.ylabel('Ag. combined efficiency (mpg)');

### Examples of Plot Matrices

In [None]:
pokemon = pd.read_csv('pokemon.csv')
pokemon.head()

In [None]:
pd.plotting.scatter_matrix(pokemon[['hp','attack','defense','speed','special-attack',
                                    'special-defense']], figsize = (12,12));

In [None]:
pkmn_stats = ['hp','attack','defense','speed','special-attack','special-defense']
g = sns.PairGrid(data = pokemon, vars = pkmn_stats);

In [None]:
g = sns.PairGrid(data = pokemon, vars = pkmn_stats)
g.map_offdiag(plt.scatter);
g.map_diag(plt.hist);

In [None]:
sns.heatmap(pokemon[pkmn_stats].corr());

In [None]:
sns.heatmap(pokemon[pkmn_stats].corr(), cmap = 'rocket_r', annot = True, fmt = '.2f');

In [None]:
sns.heatmap(pokemon[pkmn_stats].corr(), cmap = 'rocket_r', annot = True, fmt = '.2f', vmin = 0);