# Pandas

In [8]:
%matplotlib notebook

In [9]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [19]:
from numpy.random import randn,rand
s=pd.Series(np.random.randn(10).cumsum(),
index=np.arange(0, 100, 10))
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e2e9d588>

In [20]:
df = pd.DataFrame(np.random.randn(10,4).cumsum(0),
                 columns=['A','B','C','D'],
                 index = np.arange(0 ,100 ,10))
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e4d2b048>

In [21]:
fig , axes = plt.subplots(2,1)
data = pd.Series(np.random.rand(16),index = list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0],color='k',alpha=0.3) # alpha sets transparency
data.plot.barh(ax=axes[1],color='k',alpha=0.7)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e5406208>

In [22]:
np.random.seed(12348)

In [23]:
df = pd.DataFrame(np.random.rand(6,4),
index = ['one','two','three','four','five','six'],
columns = pd.Index(['A','B','C','D'],name = 'Genus'))
df

Genus,A,B,C,D
one,0.37067,0.602792,0.229159,0.486744
two,0.420082,0.571653,0.049024,0.880592
three,0.814568,0.27716,0.880316,0.431326
four,0.37402,0.89942,0.460304,0.100843
five,0.43327,0.125107,0.494675,0.961825
six,0.601648,0.478576,0.20569,0.560547


In [25]:
df.plot.bar() # bar plot

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e58a65c0>

In [26]:
df.plot.bar(stacked=True) # Stacked bar plot.

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e5f35cc0>

#  Case Study :: Tipping Dataset

Suppose we wanted to make a stacked bar plot showing the percentages of data points for each party size on each day..

In [32]:
tips

Unnamed: 0,total_bill,tip,smoker,day,time,size
0,16.99,1.01,No,Sun,Dinner,2
1,10.34,1.66,No,Sun,Dinner,3
2,21.01,3.50,No,Sun,Dinner,3
3,23.68,3.31,No,Sun,Dinner,2
4,24.59,3.61,No,Sun,Dinner,4
5,25.29,4.71,No,Sun,Dinner,4
6,8.77,2.00,No,Sun,Dinner,2
7,26.88,3.12,No,Sun,Dinner,4
8,15.04,1.96,No,Sun,Dinner,2
9,14.78,3.23,No,Sun,Dinner,2


In [33]:
tips = pd.read_csv('tips.csv')
party_counts = pd.crosstab(tips['day'],tips['size'])
party_counts
# Not many 1- and 6- person parties
party_counts = party_counts.loc[:,2:5]

In [40]:
# Normalize to sum to 1-Rows 0-Column
party_pcts = party_counts.div(party_counts.sum(1),axis=0)
party_pcts

party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e7bb0e48>

# Histogram plot

In [38]:
tips['total_bill'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e6e92320>

In [39]:
tips['total_bill'].plot.density()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e6e019b0>

In [41]:
plt.close('all')

# Seaborn

In [43]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(sum(map(ord,"aesthetics")))


def sinplot(flip=1):
    x = np.linspace(0,14,100)
    for i in range(1,7):
        plt.plot(x,np.sin(x+i *.5) * (7-i) *flip)

In [44]:
sns.set()
fig = plt.figure()
sinplot(1)

<IPython.core.display.Javascript object>

# Seaborn figure Style

In [46]:
sns.set_style('whitegrid')
data = np.random.normal(size=(20,6)) + np.arange(6)/2

sns.boxplot(data=data)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5e87222e8>

In [47]:
fig = plt.figure()
sns.set_style("dark")
sinplot()

<IPython.core.display.Javascript object>

In [48]:
fig = plt.figure()
sns.set_style("white")
sinplot()

<IPython.core.display.Javascript object>

In [49]:
fig = plt.figure()
sns.set_style("ticks")
sinplot()

<IPython.core.display.Javascript object>

In [50]:
# despine function
fig = plt.figure()
sinplot()
sns.despine()

<IPython.core.display.Javascript object>

In [51]:
plt.close('all')
f , ax = plt.subplots()
sns.violinplot(data=data)
sns.despine(offset=10,trim = True)

<IPython.core.display.Javascript object>

# Temporarily setting Figure Style using axes_style()

In [52]:
fig = plt.figure()
with sns.axes_style("darkgrid"):
    plt.subplot(211)
    sinplot()
    
plt.subplot(212)
sinplot(-1)

<IPython.core.display.Javascript object>

In [53]:
sns.axes_style()

{'axes.facecolor': 'white',
 'axes.edgecolor': '.15',
 'axes.grid': False,
 'axes.axisbelow': True,
 'axes.linewidth': 1.25,
 'axes.labelcolor': '.15',
 'figure.facecolor': 'white',
 'grid.color': '.8',
 'grid.linestyle': '-',
 'text.color': '.15',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'xtick.major.size': 6.0,
 'ytick.major.size': 6.0,
 'xtick.minor.size': 3.0,
 'ytick.minor.size': 3.0,
 'legend.frameon': False,
 'legend.numpoints': 1,
 'legend.scatterpoints': 1,
 'lines.solid_capstyle': 'round',
 'image.cmap': 'rocket',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif']}

In [54]:
fig = plt.figure()
sns.set_style("darkgrid" ,{"axes.facecolor" : ".1"})
sinplot()

<IPython.core.display.Javascript object>

# Scaling plot Elements

In [56]:
fig = plt.figure()
sns.set()
sns.set_context("paper")
sinplot()

<IPython.core.display.Javascript object>

In [57]:
sns.set_context("talk")
sinplot()

<IPython.core.display.Javascript object>

In [58]:
sns.set_context("poster")
sinplot()

<IPython.core.display.Javascript object>

In [59]:
sns.set_context("notebook" ,font_scale = 1.5 ,rc = {"lines.linewidth":2.5})
sinplot()

<IPython.core.display.Javascript object>

# Color Palettes

In [60]:
current_palette = sns.color_palette()
sns.palplot(current_palette)

<IPython.core.display.Javascript object>

In [61]:
sns.palplot(sns.color_palette("hls",8))

<IPython.core.display.Javascript object>

In [62]:
sns.palplot(sns.hls_palette(8,l=.3,s=.8))

<IPython.core.display.Javascript object>

In [67]:
colors = ['windows blue','amber','faded green','dusty purple']

In [68]:
sns.palplot(sns.xkcd_palette(colors))

<IPython.core.display.Javascript object>

In [73]:
fig = plt.figure()
plt.plot([0, 1], [0, 1], sns.xkcd_rgb["pale red"], lw=3)
plt.plot([0, 1], [0, 2], sns.xkcd_rgb["medium green"], lw=3)
plt.plot([0, 1], [0, 3], sns.xkcd_rgb["denim blue"], lw=3);

<IPython.core.display.Javascript object>

# Sequential Color Palette

In [74]:
sns.choose_colorbrewer_palette(data_type="sequential")

interactive(children=(Dropdown(description='name', options=('Greys', 'Reds', 'Greens', 'Blues', 'Oranges', 'Pu…

[(0.9575547866205305, 0.9575547866205305, 0.9575547866205305),
 (0.9012072279892349, 0.9012072279892349, 0.9012072279892349),
 (0.8328950403690888, 0.8328950403690888, 0.8328950403690888),
 (0.7502191464821223, 0.7502191464821223, 0.7502191464821223),
 (0.6434140715109573, 0.6434140715109573, 0.6434140715109573),
 (0.5387158785082661, 0.5387158785082661, 0.5387158785082661),
 (0.440322952710496, 0.440322952710496, 0.440322952710496),
 (0.342883506343714, 0.342883506343714, 0.342883506343714),
 (0.22329873125720878, 0.22329873125720878, 0.22329873125720878),
 (0.10469819300269129, 0.10469819300269129, 0.10469819300269129)]

In [75]:
sns.palplot(sns.color_palette("Blues"))

<IPython.core.display.Javascript object>

In [76]:
sns.palplot(sns.color_palette("BuGn_r"))

<IPython.core.display.Javascript object>

In [77]:
sns.palplot(sns.color_palette("GnBu_d"))

<IPython.core.display.Javascript object>

# Diverging Color Palette

In [78]:
sns.palplot(sns.color_palette("BrBG",7))

<IPython.core.display.Javascript object>

In [79]:
sns.palplot(sns.color_palette("RdBu_r",7))

<IPython.core.display.Javascript object>

In [80]:
sns.palplot(sns.color_palette("coolwarm",7))

<IPython.core.display.Javascript object>

# Plotting the Function is Seaborn

# Univariate Distribution

In [81]:
import numpy as np
import pandas as pd
from scipy import stats,integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
np.random.seed(sum(map(ord,"distribution")))

In [92]:
plt.close('all')
fig = plt.figure()
x = np.random.normal(size=100)
sns.distplot(x);

<IPython.core.display.Javascript object>



In [93]:
sns.distplot(x,bins=20,kde=False,rug=True);

<IPython.core.display.Javascript object>



In [94]:
sns.distplot(x,hist=False,rug=True);

<IPython.core.display.Javascript object>

In [95]:
sns.kdeplot(x,shade=True);

<IPython.core.display.Javascript object>

In [96]:
sns.kdeplot(x)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5ea767160>

In [98]:
sns.kdeplot(x, bw=0.2, label="bw:0.2")
sns.kdeplot(x,  bw=2,    label="bw:2")
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1f5ea021080>

In [99]:
sns.kdeplot(x,shade=True,cut=0);

<IPython.core.display.Javascript object>

In [100]:
#Gamma plot
x = np.random.gamma(6,size=200)
sns.distplot(x,kde=False,fit=stats.gamma)

<IPython.core.display.Javascript object>



<matplotlib.axes._subplots.AxesSubplot at 0x1f5ea18cc88>

# Bivariate Distribution

In [None]:
sns.set()

In [102]:
plt.close('all')
mean , cov = [0,1],[(1,.5),(.5,1)]
data = np.random.multivariate_normal(mean,cov,200)
df = pd.DataFrame(data,columns=["x","y"])
sns.jointplot(x="x",y="y",data=df)

<IPython.core.display.Javascript object>



<seaborn.axisgrid.JointGrid at 0x1f5e9e70fd0>

In [103]:
#hex-bin plot
x,y = np.random.multivariate_normal(mean,cov,1000).T
#Temporary set white
with sns.axes_style("white"):
    sns.jointplot(x=x,y=y,kind="hex",color="k");

<IPython.core.display.Javascript object>



In [104]:
#contour plot
sns.jointplot(x="x",y="y",data=df,kind="kde");

<IPython.core.display.Javascript object>

In [106]:
#contour + rugplot
f, ax = plt.subplots(figsize=(6, 6))
sns.kdeplot(df.x, df.y, ax=ax)
sns.rugplot(df.x, color="g", ax=ax)
sns.rugplot(df.y, vertical=True, ax=ax);

<IPython.core.display.Javascript object>

In [111]:
sns.set()

In [114]:
fig = plt.figure()
iris = sns.load_dataset("iris")
sns.pairplot(iris);

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Plotting Categorical Data

In [115]:
sns.set()

In [118]:
sns.set(style="whitegrid",color_codes=True)
np.random.seed(sum(map(ord,"categorical")))
titanic = sns.load_dataset("titanic")
tips    = sns.load_dataset("tips")
iris    = sns.load_dataset("iris")

In [119]:
sns.stripplot(x="day",y="total_bill",data=tips)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5ead3c9e8>

In [121]:
sns.stripplot(x="day",y="total_bill",data=tips,jitter=True)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5ea70b978>

In [122]:
#no overlap
sns.swarmplot(x="day",y="total_bill",data=tips)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5eb14cc88>

In [123]:
sns.swarmplot(x="day",y="total_bill",hue="sex",data=tips)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5eb191ef0>

In [124]:
#boxplot 
sns.boxplot(x="day",y="total_bill",hue="time",data=tips)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5eb198128>

In [127]:
#violin plot
sns.violinplot(x="total_bill",y="day",hue="time",data = tips, bw =.1,scale = "count",scale_hue = False)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5eb977588>

In [130]:
#pointplot
sns.pointplot(x="class",y="survived",hue="sex",data=titanic,
palette = {"male" : "g" ,"female" : "m"},
markers = ["^","o"],linestyles = ["-","--"]);

<IPython.core.display.Javascript object>

In [131]:
#factorplot
sns.factorplot(x="day",y="total_bill",hue="smoker",data=tips)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1f5eba1af60>

# Plotting Linear Relationship 

In [133]:
tips = sns.load_dataset("tips")

In [135]:
fig = plt.figure()
sns.regplot(x="total_bill",y="tip",data=tips)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1f5ef9ea400>

In [140]:
fig =plt.figure()
sns.lmplot(x = "total_bill",y = "tip",data=tips)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1f5efcff2e8>

# Plotting Data Aware grids

In [142]:
#Facetgrid
g = sns.FacetGrid(tips,col="time")
g.map(plt.hist,"tip")

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1f5efd6eb38>