In [1]:
%matplotlib notebook

In [2]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

## Pandas Visualization

pandas uses matplotlib under the hood, and provides some convenient functions for visualizing data.

Matplotlib comes with a number of predefined styles, which we can choose from, to change the default look of our plots.

In [3]:
plt.style.available

['Solarize_Light2',
 '_classic_test_patch',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark',
 'seaborn-dark-palette',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'tableau-colorblind10']

In [8]:
plt.style.use('seaborn-paper')

In [6]:
df = pd.DataFrame({'A': np.random.randn(365).cumsum(0), 
                   'B': np.random.randn(365).cumsum(0) + 20,
                   'C': np.random.randn(365).cumsum(0) - 20}, 
                  index=pd.date_range('1/1/2017', periods=365))
df.head()

Unnamed: 0,A,B,C
2017-01-01,0.954655,18.225419,-19.954826
2017-01-02,0.747559,17.57328,-20.452488
2017-01-03,-1.128735,17.187857,-20.230341
2017-01-04,-2.269584,16.932171,-19.446233
2017-01-05,-3.241426,18.016018,-18.782643


In [10]:
df.plot(); #Simple wrapper around plt.plot
#Semicolon -> Show only plots

<IPython.core.display.Javascript object>

In [11]:
df.plot('A','B',kind='bar')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='A'>

In [12]:
df.plot.scatter('A','C',c='B',s=df['B'],colormap='plasma')
#Return value is matplotlib.subplots -> Can do more work by matplotlib


<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='A', ylabel='C'>

In [15]:
df.plot.hist(bins=100)

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Frequency'>

In [18]:
ax = df.plot.kde();

<IPython.core.display.Javascript object>

Pandas also has plotting tools that help with visualizing large amounts of data or high dimensional data.

## Visualize large amount or high dimensional data

In [30]:
df = pd.read_csv('iris.csv')
df

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [28]:
pd.plotting.scatter_matrix(df);

<IPython.core.display.Javascript object>

## Parallel Coordinates

Pandas also has plotting tools that help with visualizing large amounts of data or high dimensional data.

Coloring the lines by class, in this example the species of flower,

It shows patterns or clustering

In [29]:
plt.figure()
pd.plotting.parallel_coordinates(df,'Name')
#

<IPython.core.display.Javascript object>

<AxesSubplot:>

## Seaborn

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib notebook

In [2]:
sns.__version__

'0.11.1'

In [3]:
v1 = pd.Series(np.random.normal(0,10,1000),name='v1')
v2 = pd.Series(2*v1 + np.random.normal(60,15,1000),name='v2')
v1

0      -5.427694
1      13.232388
2       2.932788
3      11.170147
4       3.240976
         ...    
995     3.213353
996    -7.020063
997   -13.710539
998     6.908957
999   -10.728499
Name: v1, Length: 1000, dtype: float64

In [4]:
plt.figure()
plt.hist(v1, bins=np.arange(-100,150,3),alpha=0.5)
plt.hist(v2, bins=np.arange(-100,150,3),alpha=0.5)
plt.legend()

<IPython.core.display.Javascript object>

No handles with labels found to put in legend.


<matplotlib.legend.Legend at 0x2471ef4b610>

In [5]:
plt.figure()
plt.hist([v1,v2],histtype='barstacked',bins=100,density=True)
v3 = np.concatenate((v1,v2))
sns.kdeplot(v3)

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Density'>

In [6]:
plt.figure()
sns.distplot([v1,v2],hist_kws={'color':'Teal'},kde_kws={'color':'Blue'})

<IPython.core.display.Javascript object>



<AxesSubplot:ylabel='Density'>

In [7]:
ax = sns.jointplot(v1,v2,alpha=0.4)




<IPython.core.display.Javascript object>

Seaborn is also based on matplotlib -> Can do manipulation through matplotlib

In [8]:
dir(ax.ax_joint)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_text',
 '_adjustable',
 '_agg_filter',
 '_alias_map',
 '_alpha',
 '_anchor',
 '_animated',
 '_aspect',
 '_autoscaleXon',
 '_autoscaleYon',
 '_autotitlepos',
 '_axes',
 '_axes_class',
 '_axes_locator',
 '_axisbelow',
 '_box_aspect',
 '_clipon',
 '_clippath',
 '_contains',
 '_convert_dx',
 '_current_image',
 '_default_contains',
 '_facecolor',
 '_fill_between_x_or_y',
 '_frameon',
 '_gci',
 '_gen_axes_patch',
 '_gen_axes_spines',
 '_get_axis_list',
 '_get_axis_map',
 '_get_clipping_extent_bbox',
 '_get_lines',
 '_get_patches_for_fill',
 '_get_view',
 '_gid',
 '_gridOn',
 '_in_layout',

In [9]:
sns.jointplot(v1,v2,kind='hex')



<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x24720d86c70>

In [10]:
sns.jointplot(v1,v2,kind='kde',space=0)



<IPython.core.display.Javascript object>

<seaborn.axisgrid.JointGrid at 0x24720f97520>

## Seaborn high dimensional data

In [12]:
df = pd.read_csv('iris.csv')
df

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [24]:
sns.pairplot(df,hue='Name',diag_kind='kde')

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x24723a71d00>

### Violin&Swarm plot -> Improved version of boxplot

In [25]:
plt.figure()
plt.subplot(121)
sns.swarmplot('Name','PetalLength',data=df)
plt.subplot(122)
sns.violinplot('Name','PetalLength',data=df)

<IPython.core.display.Javascript object>



<AxesSubplot:xlabel='Name', ylabel='PetalLength'>

Violin plots convey more information than box plots and are able to show particular phenomena within the distribution that box plots are unable to convey such as multi modality.