- [Seaborn Cheatsheet](https://datacamp-community-prod.s3.amazonaws.com/f9f06e72-519a-4722-9912-b5de742dbac4)
- [Color palletes](https://chrisalbon.com/python/data_visualization/seaborn_color_palettes/)


In [1]:
import pandas as pd
import numpy as np
from statistics import *
import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib notebook
import seaborn as sns
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

#from pylab import rcParams
#rcParams['figure.figsize'] = 5,7

In [2]:
crash_df = sns.load_dataset('car_crashes')

In [9]:
sns.set_theme(context='paper', 
                  style='white',      # darkgrid, whitegrid, dark, white, ticks
                  palette='PuOr_r',   # pastel Blues RdPu
                  #font='sans-serif', 
                  font_scale=1, 
                  rc=None)

plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['figure.figsize']=(10,5)


#fig = plt.figure(figsize=(10, 1))
#ax = fig.add_subplot(1,1,1)
sns.jointplot(x = 'speeding',
              y = 'alcohol',
              data = crash_df, 
             kind = 'reg')

<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x7fe003c832e0>

In [None]:
wide = 7; tall = 7
fig = plt.figure(figsize = [wide, tall])
fig.suptitle("Wine Type - Quality", fontsize=14)


ax1 = fig.add_subplot(2,2,1)
ax1.hist(np.random.randn(100), bins = 20, color = 'k', alpha = .3)
ax1.set_title("Red Wine")
ax1.set_xlabel("Quality")
ax1.set_xticks(range(-4,5,2)) 
ax1.set_xticklabels(range(-4,5,2), fontsize=12)
ax1.set_ylabel("Frequency")
#ax1.annotate("something",xy=(2006,fy[2006]))
#.legend('ABCDEF', ncol=2, loc='upper left');


ax2 = fig.add_subplot(2,2,2)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
ax2.set_title("Red Wine")
ax2.set_xlabel("Quality")
ax2.set_ylabel("Frequency")


ax3 = fig.add_subplot(2,2,3)
ax3.plot(np.random.randn(50).cumsum(), 'k--')
ax3.set_title("Red Wine")
ax3.set_xlabel("Quality")
ax3.set_ylabel("Frequency")


fig.subplots_adjust(hspace=0.5, wspace=0.3)

In [None]:
# Background
sns.set_style() # 'white', 'dark', 'whitegrid', 'darkgrid', 'ticks'

# Colors
sns.palplot(sns.color_palette("coolwarm", 10))        # ordinal
sns.palplot(sns.color_palette("Blues", 10))       # interval
sns.palplot(sns.color_palette("colorblind", 10))  # categorical

--- 

# Figure with Subplots

In [26]:
wide = 7; tall = 7
fig = plt.figure(figsize = [wide, tall])
fig.suptitle("Wine Type - Quality", fontsize=14)


ax1 = fig.add_subplot(2,2,1)
ax1.hist(np.random.randn(100), bins = 20, color = 'k', alpha = .3)
ax1.set_title("Red Wine")
ax1.set_xlabel("Quality")
ax1.set_xticks(range(-4,5,2)) 
ax1.set_xticklabels(range(-4,5,2), fontsize=12)
ax1.set_ylabel("Frequency")
#ax1.annotate("something",xy=(2006,fy[2006]))


ax2 = fig.add_subplot(2,2,2)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
ax2.set_title("Red Wine")
ax2.set_xlabel("Quality")
ax2.set_ylabel("Frequency")


ax3 = fig.add_subplot(2,2,3)
ax3.plot(np.random.randn(50).cumsum(), 'k--')
ax3.set_title("Red Wine")
ax3.set_xlabel("Quality")
ax3.set_ylabel("Frequency")


fig.subplots_adjust(hspace=0.5, wspace=0.3)

<IPython.core.display.Javascript object>

---

**Histogram and KDE**

In [38]:
df = pd.read_csv('fortune500.csv', usecols=['Year', 'Company', 'Revenue (in millions)'])
df['Revenue (in millions)'] = pd.to_numeric(df['Revenue (in millions)'], errors = 'coerce')
df = df.dropna()

In [37]:
wide = 3; tall = 3
fig = plt.figure(figsize = [wide, tall])
sns.distplot(df["Revenue (in millions)"], 
                hist = False, 
                kde = True, 
                kde_kws = {'shade':True}, 
                rug = False, 
                bins = 10,
                hist_kws = dict(alpha=1))
plt.show()

<IPython.core.display.Javascript object>

**3d plot**

In [None]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(pca_3d[:,0], 
           pca_3d[:,1], 
           pca_3d[:,2],
           c=kmeans.labels_,
           edgecolors='b')

ax.set_xlabel('X - What they bet')
ax.set_ylabel('Y - What they earn')
ax.set_zlabel('Z - Frequency of visits')
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
plt.show()

**Regression plot, mean, median and facets**

In [None]:
sns.lmplot(data=df,
           x="insurance_losses",
           y="premiums",
           #hue="Region",    to make each region its own line
           #row="Region",    to make each region its own row
           #col="Region")    to make each region its own column
           
ax2.axvline(x=median, color='m', label='Median', linestyle='--', linewidth=2)
ax2.axvline(x=mean, color='b', label='Mean', linestyle='-', linewidth=2)

# Examples

In [2]:
# Set up figure 
wide = 9; tall = 3
fig = plt.figure(figsize = (wide, tall))


# Read in file, drop na's
fortune500 = pd.read_csv('fortune500.csv', usecols = ['Year', 'Profit (in millions)', 'Company', 'Revenue (in millions)'], na_values = ['N.A.'])
fortune500 = fortune500.dropna()
fortune500['Profit (in millions)'] = (fortune500['Profit (in millions)'].str.split()).apply(lambda x: float(x[0].replace(',', '')))


# Sub 1 --> median by year
fy = fortune500.groupby('Year')['Profit (in millions)'].median()
ax1 = fig.add_subplot(1,2,1)
ax1.plot(fy, 'b-')
ax1.title.set_text('Profit by Year')
ax1.set_ylabel("Median Profit (millions)")
ax1.set_xlabel('Year')
ax1.annotate("something",xy=(2006,fy[2006]))
fig.show()


# Sub 2 --> avg profit and rev by year
ax2 = fig.add_subplot(1,2,2)
fortune500['Year'] = pd.to_datetime(fortune500['Year'], format = "%Y")
fortune500['Revenue (in millions)'] = pd.to_numeric(fortune500['Revenue (in millions)'], errors = 'coerce')
fortune500['Profit (in millions)'] = pd.to_numeric(fortune500['Profit (in millions)'], errors = 'coerce')
fortune500 = fortune500[fortune500['Year'].dt.year > 1999]
fortune500.dropna(inplace = True)
new = fortune500.groupby(['Year']).agg('mean')
ax2.plot(new)

<IPython.core.display.Javascript object>


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


[<matplotlib.lines.Line2D at 0x24203c59f48>,
 <matplotlib.lines.Line2D at 0x24203c77088>]

In [5]:
yvals = pd.read_csv('digits.txt', squeeze = True, header = None)

xvals = list(yvals)
d_series = Series(Series.value_counts(xvals, normalize=False), index = xvals)   

wide = 2
tall = 2
fig = plt.figure(figsize = (wide, tall))

plt.hist(yvals)
plt.axis([0, 10, 0, 5])
plt.xticks(np.arange(0., 10., 2))
plt.title('Digit Frequency')
plt.ylabel('Frequency')
plt.xlabel('Digits')
plt.show()

<IPython.core.display.Javascript object>

## Lines

In [8]:
wide = 8 ; tall = 4
fig = plt.figure(figsize = (wide, tall))



def plot_tax_rates(file, countries):

    tax = pd.read_csv(file, usecols = ['LOCATION', 'Value', 'TIME'])

    # Only the states we want
    tax = tax[tax['LOCATION'].isin(countries)]

    ##################################################################
    # First picture - multiple lines on one axis
    # Grouping by both time and by country
    each_c = tax.groupby(['TIME', 'LOCATION']).mean()['Value']

    ax1 = fig.add_subplot(1,2,1)
    each_c.groupby('LOCATION').plot()
    ax1.set_xlabel('Year')
    ax1.legend()
    ax1.set_title('Country Tax Rate')


    ##################################################################
    # Second picture - group avg
    total_avg = tax.groupby('TIME').mean()['Value']

    ax2 = fig.add_subplot(1,2,2)
    total_avg.plot()
    ax2.set_xlabel('Year')
    ax2.set_title('Group Avg Tax Rate')
    
plot_tax_rates('corp_tax.csv', ['USA', 'FRA', 'CAN'])

<IPython.core.display.Javascript object>