## Interactive Data Visualization with Plotly

[Plotly Express for Python Documentation](https://plotly.com/python/plotly-express/)

### Loading Sample Data

In [2]:
import pandas as pd

df = pd.read_csv('data/Processed.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)

df.head()

Unnamed: 0,episode_id,episodenbr,title,views,share,appreciation,chart,summary,date,doctorid,...,season,has_9,has_10,has_11,has_12,has_13,day_Mon,day_Sat,day_Sun,day_Tue
0,1.01,697,Rose,10.81,44.8,76.0,7,No Summary Available,2005-03-26,9,...,1,1,0,0,0,0,0,1,0,0
1,1.02,698,The End of the World,7.97,37.8,76.0,19,No Summary Available,2005-04-02,9,...,1,1,0,0,0,0,0,1,0,0
2,1.03,699,The Unquiet Dead,8.86,37.8,80.0,15,No Summary Available,2005-04-09,9,...,1,1,0,0,0,0,0,1,0,0
3,1.04,700,Aliens of London,7.63,35.7,82.0,18,No Summary Available,2005-04-16,9,...,1,1,0,0,0,0,0,1,0,0
4,1.05,701,World War Three,7.98,40.2,81.0,20,No Summary Available,2005-04-23,9,...,1,1,0,0,0,0,0,1,0,0


### Global Theme Settings

In [3]:
import plotly.express as px

px.defaults.color_continuous_scale = px.colors.sequential.Agsunset
px.defaults.color_discrete_sequence = px.colors.qualitative.Prism
px.defaults.template = 'plotly_dark'

### Scatter Plots

In [10]:
px.scatter(df, x='season', y='rating', color='appreciation', hover_data=['title'])

### 3D Scatter Plots

In [14]:
px.scatter_3d(df, x='season', y='rating', z='votes', color='doctorid', hover_data=['title'])

### Histograms

In [40]:
px.histogram(df, x='rating', nbins=50, color='has_13', marginal='violin')

### Box Plots and Violin Plots

In [27]:
px.box(df, x='rating', color='doctorid', points='all')

In [28]:
px.violin(df, x='rating', color='doctorid', points='all')

### Bar Charts

In [44]:
px.bar(df, y='share', color='season', hover_data=['title'])

In [54]:
px.bar_polar(df, r='appreciation', color='season', hover_data=['title'], title='Episode Votes by Season')

### Line Charts and Area Charts

In [72]:
px.line(df, x='date', y='rating', color='doctorid')

In [76]:
px.line_3d(df, x='date', y='rating', z='appreciation', color='doctorid')

In [68]:
px.area(df, x='date', y='rating', color='season')

### Pie Charts

In [80]:
px.pie(df, values='views', names='doctorid')

### Tree Maps

In [88]:
px.treemap(df,
           values='rating', 
           path=[px.Constant('All Doctors'), 'doctorid', px.Constant('All Seasons'), 'season', 'title'], 
           color='rating')


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



### Sunbursts

In [92]:
px.sunburst(df,
           values='views', 
           path=[px.Constant('All Doctors'), 'doctorid', 'title'], 
           color='views')


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



### Heatmaps and Correlation Analysis

In [93]:
corr = df.corr()
corr

Unnamed: 0,episode_id,episodenbr,views,share,appreciation,chart,doctorid,rating,votes,season,has_9,has_10,has_11,has_12,has_13,day_Mon,day_Sat,day_Sun,day_Tue
episode_id,1.0,0.996921,-0.357616,-0.724653,-0.329043,0.168053,0.966445,-0.304684,-0.395395,0.999922,-0.490662,-0.588435,0.036907,0.603687,0.471761,-0.078638,-0.383035,0.406322,0.06793
episodenbr,0.996921,1.0,-0.388559,-0.739967,-0.31695,0.194821,0.964009,-0.283154,-0.395498,0.995919,-0.486152,-0.603353,0.054345,0.608256,0.454978,-0.090732,-0.357784,0.380753,0.061823
views,-0.357616,-0.388559,1.0,0.726807,0.16736,-0.780737,-0.317449,0.045729,0.329494,-0.352477,0.106552,0.236464,0.143525,-0.511289,0.083302,0.116756,-0.318849,0.190427,0.281545
share,-0.724653,-0.739967,0.726807,1.0,0.325683,-0.636914,-0.680867,0.190566,0.44903,-0.722579,0.286955,0.53332,-0.003023,-0.687832,-0.075659,0.045405,-0.048612,-0.006939,0.104642
appreciation,-0.329043,-0.31695,0.16736,0.325683,1.0,-0.260721,-0.347308,0.549926,0.235551,-0.330742,-0.254506,0.379954,0.352814,-0.375185,-0.358403,-0.014896,0.313893,-0.323525,-0.02598
chart,0.168053,0.194821,-0.780737,-0.636914,-0.260721,1.0,0.11298,-0.038217,-0.294992,0.164042,0.118854,-0.252066,-0.110871,0.417973,-0.200637,-0.054962,0.320307,-0.264831,-0.132194
doctorid,0.966445,0.964009,-0.317449,-0.680867,-0.347308,0.11298,1.0,-0.337997,-0.341335,0.96655,-0.554591,-0.547767,0.031755,0.570571,0.534884,-0.071582,-0.417708,0.454248,0.051266
rating,-0.304684,-0.283154,0.045729,0.190566,0.549926,-0.038217,-0.337997,1.0,0.465623,-0.307224,0.042146,0.162622,0.103112,0.019207,-0.533155,-0.027511,0.427622,-0.464023,-0.078775
votes,-0.395395,-0.395498,0.329494,0.44903,0.235551,-0.294992,-0.341335,0.465623,1.0,-0.395378,0.132802,0.292113,-0.027256,-0.339602,-0.032979,0.004547,0.051488,-0.016931,-0.05665
season,0.999922,0.995919,-0.352477,-0.722579,-0.330742,0.164042,0.96655,-0.307224,-0.395378,1.0,-0.490878,-0.588004,0.03642,0.603657,0.472139,-0.07684,-0.385165,0.408561,0.068342


In [99]:
px.imshow(corr, text_auto=True, aspect='auto')

In [103]:
import numpy as np

# Create a triangular mask to drop duplicated data
mask = np.zeros_like(corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True
corr_mask = corr.mask(mask).dropna(how='all').dropna('columns', how='all')

# Show this data in a heatmap
px.imshow(corr_mask, text_auto=True, aspect='auto')


In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.

