# Visualization in Plotly Python

**Author:** Iftikhar Ud Din <br>
**Portfolio:** [DinIftikhar](https://diniftikhar.github.io/) <br>
**LinkedIn Profile:** [DinIftikhar](https://www.linkedin.com/in/diniftikhar/) 

## Objective:

This notebook contains all the plotly plots + the pie charts that I learned during the Jose Portilla course on [Udemy](https://www.udemy.com/course/interactive-python-dashboards-with-plotly-and-dash/). The objective was to practice my visualisation skills in Plotly.


In [None]:
#!pip install kaleido
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

In [None]:
np.random.seed(42)

## 1. Scatter Plot

In [None]:
random_x = np.random.randint(1,101,100)
random_y = np.random.randint(1,101,100)
data = [go.Scatter(
    x = random_x,  
    y = random_y, 
    mode = 'markers',
    marker = dict(size = 12,color = 'rgb(212,53,108)',symbol = 'circle', line = {'width':0.5})
)]

       
layout = go.Layout(
    title = dict(text = 'Scatter Plot',x = 0.5), #x= 0.5 here aligns title to the center of graph
    xaxis = dict(title = 'xlabel'),
    yaxis = dict(title = 'ylabel'),
    hovermode = 'closest',
)

fig1 = go.Figure(data = data,layout = layout)
fig1.show()

## 2. Line Plot

In [None]:
np.random.seed(56)

x_values = np.linspace(0,1,100)
y_values = np.random.randn(100)

trace0 = go.Scatter(x = x_values, y = y_values+ 5, mode = 'markers', name = 'markers')
trace1 = go.Scatter(x = x_values, y = y_values, mode = 'lines', name = 'mylines')
trace2 = go.Scatter(x = x_values, y = y_values - 5, mode = 'lines+markers', name = 'myfav')
data = [trace0, trace1, trace2]

layout = go.Layout(title = dict(text = 'Line Charts', x = 0.5))

fig2 = go.Figure(data = data, layout = layout)
fig2.show()

#### 2.1 Plotting Line Charts using Pandas

In [None]:
df_nst = pd.read_csv('SourceData/nst-est2017-alldata.csv')
df_nst.head()

In [None]:
df2 = df_nst[df_nst['DIVISION'] == '1']
df2.set_index('NAME', inplace = True)

In [None]:
list_of_pop_col = [col for col in df2.columns if col.startswith('POP')]
df2 = df2[list_of_pop_col]
df2

In [None]:
data = [
    go.Scatter(
        x = df2.columns,
        y = df2.loc[name],
        mode = 'lines',
        name = name,
    ) 
for name in df2.index]

layout = go.Layout(title = dict(text = 'Population Estimates of Different States',x=0.5),
                  yaxis = dict(title = 'Estimated Values'),)

fig3 = go.Figure(data = data, layout = layout)
fig3.show()

#### 2.2 Exercise

In [None]:
df_yuma = pd.read_csv('Data/2010YumaAZ.csv')
df_yuma.head()

In [None]:
days = ['TUESDAY', 'WEDNESDAY','THURSDAY','FRIDAY','SATURDAY','SUNDAY','MONDAY']
data = []
for day in days:
    trace = go.Scatter(
                      x = df_yuma['LST_TIME'],
                      y = df_yuma[df_yuma['DAY']==day]['T_HR_AVG'],
                      mode = 'lines',
                      name = day
    )
    data.append(trace)

layout = go.Layout(title = dict(text = 'Daily Average Temperature from June 1-7, 2010 in Arizona', x = 0.5),
                   xaxis = dict(title = 'Time (24hr format)'),
                   yaxis = dict(title = 'Average Temperature'),
                   
)

fig4 = go.Figure(data = data, layout = layout)
fig4.show()

## 3. Bar Chart

In [None]:
df_winter = pd.read_csv('Data/2018WinterOlympics.csv')
df_winter.head()

In [None]:
data = [go.Bar(x = df_winter['NOC'],y = df_winter['Total'])]
layout = go.Layout(title = 'Bar Chart')
fig5 = go.Figure(data=data, layout = layout)
fig5.show()

#### 3.1 Nested Bar Chart

In [None]:
trace1 = go.Bar(x = df_winter['NOC'], y = df_winter['Gold'], name = 'Gold', marker = {'color':'#FFD700'})
trace2 = go.Bar(x = df_winter['NOC'], y = df_winter['Silver'], name = 'Silver', marker = {'color':'#9EA0A1'})
trace3 = go.Bar(x = df_winter['NOC'], y = df_winter['Bronze'], name = 'Bronze', marker = {'color':'#CD7F32'})

data = [trace1,trace2,trace3]

layout = go.Layout(title = 'Medals Count for Winter Olympics 2018')

fig6 = go.Figure(data = data, layout = layout)
fig6.show()

#### 3.2 Stacked Bar Chart

In [None]:
trace1 = go.Bar(x = df_winter['NOC'], y = df_winter['Gold'], name = 'Gold', marker = {'color':'#FFD700'}, text = df_winter['Gold'])
trace2 = go.Bar(x = df_winter['NOC'], y = df_winter['Silver'], name = 'Silver', marker = {'color':'#9EA0A1'},text = df_winter['Silver'])
trace3 = go.Bar(x = df_winter['NOC'], y = df_winter['Bronze'], name = 'Bronze', marker = {'color':'#CD7F32'},text = df_winter['Bronze'])

data = [trace1,trace2,trace3]

layout = go.Layout(title = 'Medals Count for Winter Olympics 2018', barmode = 'stack')

fig7 = go.Figure(data = data, layout = layout)
fig7.show()

#### 3.3 Exercise

In [None]:
df_mock = pd.read_csv('Data/mocksurvey.csv', index_col = 0 )
df_mock.head()

In [None]:
data =[
    go.Bar(
    y = df_mock.index,
    x = df_mock[response],
    orientation = 'h',
    name = response,
    )
    for response in df_mock.columns
]

layout = go.Layout(title = dict(text = 'Mock Survey Questions', x = 0.5),
                  barmode = 'relative')

fig8 = go.Figure(data = data, layout = layout)
fig8.show()

## 4. Pie and Donut Charts

In [None]:
labels = df_mock.columns.tolist()
values = df_mock.loc['Question 1'].values.tolist()

data = [
    go.Pie(labels = labels, values = values)
]
layout = go.Layout(title = 'Survey Question 1 Answers',
                  title_font=dict(size=25,family='Verdana', 
                                   color='darkred'), 
                                   font_size=20)

fig9 = go.Figure(data = data, layout = layout)
fig9.show()

In [None]:
labels = df_mock.columns.tolist()
values = df_mock.loc['Question 2'].values.tolist()

data = [
    go.Pie(labels = labels, values = values, hole = 0.4)
]
layout = go.Layout(title = 'Survey Question 2 Answers',
                  title_font=dict(size=25,family='Verdana', 
                                   color='darkred'), 
                                   font_size=20)

fig10 = go.Figure(data = data, layout = layout)
fig10.show()

## 4. Bubble Plots

In [None]:
df_mpg = pd.read_csv('Data/mpg.csv', na_values={'horsepower':'?'})
df_mpg.head()

In [None]:
data = [
    go.Scatter(
            x = df_mpg['horsepower'], 
            y = df_mpg['mpg'],
            text = df_mpg['name'],
            mode = 'markers',
            marker = dict(size = df_mpg['weight']/100, color = df_mpg['cylinders'], showscale = True)
    )
]

layout = go.Layout(title = 'Bubble Chart of mpg vs horsepower', hovermode = 'closest')

fig11 = go.Figure(data = data, layout = layout)
fig11.show()

#### 4.1 Excercise 

In [None]:
data = [
    go.Scatter(
            x = df_mpg['displacement'], 
            y = df_mpg['acceleration'],
            text = df_mpg['name'],
            mode = 'markers',
            marker = dict(size = df_mpg['weight']/100, color = df_mpg['cylinders'], showscale = True)
    )
]

layout = go.Layout(title = 'Bubble Chart of displacement vs acceleration', hovermode = 'closest')

fig12 = go.Figure(data = data, layout = layout)
fig12.show()

## 5. Boxplots

In [None]:
# set up an array of 20 data points, with 20 as the median value
y = [1,14,14,15,16,18,18,19,19,20,20,23,24,26,27,27,28,29,33,54]

data = [
    go.Box(
        y=y,
        boxpoints='outliers', # display the original data points
        #jitter=0.3,      # spread them out so they all appear
        #pointpos=-1.8    # offset them to the left of the box
    )
]
layout = go.Layout(title = 'Boxplot')

fig13 = go.Figure(data = data, layout = layout)
fig13.show()

#### 5.1 Mark Twain's analysis

In [None]:
snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

data = [
    go.Box(
        y=snodgrass,
        name='Snodgrass'
    ),
    go.Box(
        y=twain,
        name='Twain'
    )
]
layout = go.Layout(
    title = 'Comparison of three-letter-word frequencies<br>\
    between Quintus Curtius Snodgrass and Mark Twain'
)
fig14 = go.Figure(data=data, layout=layout)
fig14.show()

#### 5.2 Excercise

In [None]:
df_abalone = pd.read_csv('Data/abalone.csv')
df_abalone.head()

In [None]:
x = np.random.choice(df_abalone['rings'],35,replace = False)
y = np.random.choice(df_abalone['rings'], 30, replace = False)

In [None]:
box_x = go.Box(y=x, boxpoints = 'outliers', name = 'Sample x')
box_y = go.Box(y=y, boxpoints = 'outliers', name = 'Sample y')
data = [box_x, box_y]

layout = go.Layout(title = 'Comparison of two samples')

fig15 = go.Figure(data = data, layout = layout)
fig15.show()

## 6. Histograms

In [None]:
data = go.Histogram(x = df_mpg['mpg'], xbins = dict(start=0, end=50,size= 2))
layout = go.Layout(title = 'Histogram')
fig16 = go.Figure(data= data, layout  = layout)
fig16.show()

#### 6.1 Exercise

In [None]:
data = [go.Histogram(x = df_abalone['length'], xbins = dict(size = 0.2))]
layout = go.Layout(title = 'Histogram')
fig17 = go.Figure(data = data, layout = layout)
fig17.show()

## 7. Dist or Distribution Plots

In [None]:
#!pip install scipy
import scipy
import plotly.figure_factory as ff

In [None]:
x = np.random.randn(1000)

In [None]:
hist_data = [x]
group_labels = ['Distplot']
fig18 = ff.create_distplot(hist_data, group_labels)
fig18.show()

In [None]:
x1 = np.random.randn(200)-2
x2 = np.random.randn(200)
x3 = np.random.randn(200)+2
x4 = np.random.randn(200)+4

In [None]:
hist_data = [x1,x2,x3,x4]
group_labels = ['X1','X2','X3','X4']

fig19 = ff.create_distplot(hist_data, group_labels, bin_size = [0.2,0.3,0.4,0.5])
fig19.show()

In [None]:
hist_data = [snodgrass, twain]
group_labels = ['Snodgrass','Twain']

fig20 = ff.create_distplot(hist_data, group_labels, bin_size = [0.005,0.005])
fig20.show()

#### 7.1 Exercise 

In [None]:
df_iris = pd.read_csv('Data/iris.csv')
df_iris.head()

In [None]:
setosa = df_iris[df_iris['class'] == 'Iris-setosa']['petal_length']
versicolor = df_iris[df_iris['class'] == 'Iris-versicolor']['petal_length']
virginica = df_iris[df_iris['class'] == 'Iris-virginica']['petal_length']

hist_data = [setosa,versicolor,virginica]
group_labels = ['Iris-setosa','Iris-versicolor','Iris-virginica']
fig21 = ff.create_distplot(hist_data, group_labels, bin_size = [0.2,0.2,0.2])
fig21.show()

## 8. Heatmaps

In [None]:
df_ca = pd.read_csv('Data/2010SantaBarbaraCA.csv')
df_ca.head()

In [None]:
data = [
    go.Heatmap(
        x = df_ca['DAY'],
        y = df_ca['LST_TIME'],
        z = df_ca['T_HR_AVG'].values.tolist(), #colorscale only accepts list values
        colorscale = 'JET'
    )
]

layout = go.Layout(title = 'Santa Barbara Daily Hourly Average Temperature Heatmap from June 1-7, 2010')

fig22 = go.Figure(data = data, layout = layout)
fig22.show()

## 9. Multiple Subplots in Plotly

In [None]:
#using df_ca, df_yuma, and will import another dataset 2010SitkaAK.csv
df_sitka = pd.read_csv('Data/2010SantaBarbaraCA.csv')
df_sitka

In [None]:
sitka = go.Heatmap(
        x = df_sitka['DAY'],
        y = df_sitka['LST_TIME'],
        z = df_sitka['T_HR_AVG'],
        colorscale = 'JET',
        zmin = 5,
        zmax = 40
    )


ca = go.Heatmap(
        x = df_ca['DAY'],
        y = df_ca['LST_TIME'],
        z = df_ca['T_HR_AVG'],
        colorscale = 'JET',
        zmin = 5,
        zmax = 40
    )

yuma = go.Heatmap(
        x = df_yuma['DAY'],
        y = df_yuma['LST_TIME'],
        z = df_yuma['T_HR_AVG'],
        colorscale = 'JET',
        zmin = 5,
        zmax = 40
    )


In [None]:
from plotly import subplots

In [None]:
fig23 = subplots.make_subplots(rows = 1, cols = 3, subplot_titles = ['Sitka AK', 'Santa Barbara CA','Yuma AZ'], 
                          y_title = 'Hourly Avg Temp', shared_yaxes = True)
fig23.append_trace(sitka,1,1)
fig23.append_trace(ca,1,2)
fig23.append_trace(yuma,1,3)

fig23.update_layout(title = dict(text = 'Tempratues for 3 states', x = 0.5)
                 )

fig23.show()
