# Plotly Tutorial

I am making a data vizualization tutorial using the `plotly` library in python. I will be creating the following graphs:

- Scatter Plots
- Line Plots
- Heatmaps 
- Histograms
- Violin Plots
- Box Plots
- 3D Scatter Plots
- 3D Surface Plots
- Pie Charts
- Bar Graphs

I am using `plotly` as opposed to `matplotlib.pyplot` or `seaborn` is that because `plotly` is **interactive**. So with that being said, 

## Lets get Started!!!

In [1]:
# Importing libraries

import pandas as pd
import plotly.express as px

In [2]:
df = px.data.iris()
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [3]:
color_sequence = px.colors.sequential.Inferno

fig = px.scatter(df, # needs to have the df in the parentheses
                 x = 'petal_length',
                 y = 'petal_width',
                 marginal_x = 'violin',# shows distribuion
                 marginal_y = 'histogram', # can be violin, hist, rug, box
                 color = 'petal_width',
                 color_continuous_scale=color_sequence, # color
                 trendline = 'ols') # adds a trendline displaying R^2

fig.update_layout(title = 'Iris Vizualization',
                  xaxis_title = 'Petal Length (cm)',
                  yaxis_title = 'Petal Width (cm)')

fig.show()

In [4]:
# Line Plot

canada = px.data.gapminder().query("country=='Canada'")
canada.head()


Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
240,Canada,Americas,1952,68.75,14785584,11367.16112,CAN,124
241,Canada,Americas,1957,69.96,17010154,12489.95006,CAN,124
242,Canada,Americas,1962,71.3,18985849,13462.48555,CAN,124
243,Canada,Americas,1967,72.13,20819767,16076.58803,CAN,124
244,Canada,Americas,1972,72.88,22284500,18970.57086,CAN,124


In [5]:
color_sequence = px.colors.sequential.Inferno

fig = px.line(canada,
              x='year',
              y='lifeExp',
              color_discrete_sequence = color_sequence)
fig.update_layout(title='Life Expectancy of Canada',
                  xaxis_title='Year',
                  yaxis_title='Life Expectancy')
fig.show()

In [6]:
# piechart

df_tips = px.data.tips()

df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [7]:
color_sequence = px.colors.sequential.Inferno

fig = px.pie(df_tips,
             values = 'tip',
             names = 'day', 
             hole=0.2,
             color = 'tip',
             color_discrete_sequence = color_sequence,
             title = "Tipping in US Restaurants")
fig.show()

In [8]:
# lets make a bar graph

color_squence = px.colors.sequential.Inferno

canada_pop = px.data.gapminder().query("country == 'Canada'")

fig = px.bar(canada_pop,
             x = 'year',
             y = 'pop', 
             color = 'lifeExp', # color goes up in lightness when lifeExp goes up
             color_continuous_scale=color_sequence)
fig.update_layout(title = "Population of Canada Over Time",
                  xaxis_title = 'Years',
                  yaxis_title = 'Population')

fig.show()

In [9]:
# box plot

tip_data = px.data.tips()
color_sequence = px.colors.sequential.Inferno

fig = px.box(tip_data,
             x='time',
             y='total_bill',
             color='smoker',
             color_discrete_sequence=color_sequence)

fig.update_layout(title='Tips Box Plot',
                  xaxis_title='Time',
                  yaxis_title='Bill Total')

fig.show()

In [10]:
# violin plot
color_sequence = px.colors.sequential.Inferno

df_for_tips = px.data.tips()

fig = px.violin(df_for_tips,
                y='tip',
                x='smoker',
                color='sex',
                box = True,
                color_discrete_sequence=color_sequence)

fig.update_layout(title='Tip Data',
                  yaxis_title='Tip Amount',
                  xaxis_title='Smoker')

fig.show()

In [11]:
# histogram example

color_sequence = px.colors.sequential.Inferno

tip_data = px.data.tips()

fig = px.histogram(tip_data,
                   x='total_bill', 
                   nbins=30,
                   color_discrete_sequence=color_sequence,
                   title='Distribution of Tips')

fig.update_layout(title='Distribution of Tips',
                  xaxis_title='Bill Total',
                  yaxis_title='Count')

fig.show()

In [12]:
# 3d Scatter Plot

color_sequence = px.colors.sequential.Inferno

data = px.data.iris()

fig = px.scatter_3d(data,
                    x='sepal_length',
                    y='sepal_width',
                    z='petal_width',
                    color='petal_length',
                    color_continuous_scale=color_sequence)

fig.update_layout(title='3D Iris Scatter Plot',
                  xaxis_title='Sepal Length',
                  yaxis_title='Sepal Width')

fig.show()

In [13]:
import plotly.graph_objects as go
import numpy as np

color_sequence = px.colors.sequential.Inferno


x = np.outer(np.linspace(-2,2,30),np.ones(30))
y = x.copy().T
z = np.cos(x**2 + y**2)
fig = go.Figure(data=[go.Surface(x=x, y=y, z=z)])

fig.show()

In [16]:
# User Inputted Data Scatter Plot

import pandas as pd
import plotly.express as px

x_values = input('Enter x values (use commas to separate): ').split(',')
y_values = input('Enter y values (Separate Values with commas): ').split(',')

x_values = [float(x) for x in x_values]
y_values = [float(y) for y in y_values]

while len(x_values) != len(y_values) or len(y_values) != len(x_values):
    x_values = input('Enter x values (use commas to separate): ').split(',')
    y_values = input('Enter y values (Separate Values with commas): ').split(',')

    x_values = [float(x) for x in x_values]
    y_values = [float(y) for y in y_values]

print(x_values, y_values)

df_user = pd.DataFrame({
    'x': x_values,
    'y': y_values
})

fig = px.scatter(df_user,
                 x='x',
                 y='y',
                 trendline = 'lowess', # trendline options are 'lowess' for quadratic, 'ols' for linear, trendline_options=dict(log_x=True) for log functions
                 marginal_x = 'histogram',
                 marginal_y = 'histogram',
                 color='y',
                 color_continuous_scale=px.colors.sequential.Inferno,
                 title='User Input Scatter Plot')

fig.show()

In [14]:
# Making a Correlation Heatmap

import numpy as np
np.random.seed(42)

data = {
    'A': np.random.randn(100),
    'B': np.random.randn(100),
    'C': np.random.randn(100),
    'D': np.random.randn(100)
}

df = pd.DataFrame(data)

In [15]:
colors = px.colors.sequential.Inferno

corr_matrix = df.corr()

fig = px.imshow(
    corr_matrix, # matrix to vizualize
    labels = dict(color="correlation"), # label for color bar
    x = list(corr_matrix.columns), # labels for x-axis
    y = list(corr_matrix.index), # labels for y-axis
    color_continuous_scale = colors,
    zmin = -1, zmax = 1, # Range for Color Scale
    text_auto = True, # adds text annotations
    aspect = "auto", # Controls the Aspect Ratio
    origin = "lower" # controls the origin to make it the lower left corner
)

fig.show()

In [16]:
# cretaing a confusion matrix using plotly and scikit-learn libraries

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn. metrics import confusion_matrix, accuracy_score

In [34]:
# loading in the dataset and feature selection
df = load_iris()
X = df.data
y = df.target

# spliting the dataset into training and tetsing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Training the Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predicting the labels for the test set
y_pred=clf.predict(X_test)

# Computing the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Coverting Confusion Matrix to string format for annotations
cm_text = [[str(y) for y in X] for X in cm]

In [35]:
import plotly.figure_factory as ff

fig = ff.create_annotated_heatmap(z=cm, # confusion matrix values
    x = list(df.target_names), # Labels for th X-Axis
    y = list(df.target_names), # Labels for the Y-Axis
    annotation_text=cm, # Text Annotations for Each Cell
    colorscale="Inferno") # Color Scale for the Heatmap

fig.update_layout(title='Confusion Matrix',
                  xaxis=dict(title = 'Predicted Label'),
                  yaxis=dict(title = 'True Label'))

fig.show()