# Introduction to Plotly Python

Plotly Express is the easy-to-use, high-level interface to Plotly, which operates on a variety of types of data and produces easy-to-style figures.

In [1]:
#pip install plotly   

Plotly Express requires you have pandas installed.

In [2]:
import pandas as pd
pd.set_option('display.max_columns', 10)
import plotly.express as px

## Simple Line Plot

In [3]:
# Defining simple data
x = [1, 2, 3, 4, 5]
y = [4, 5, 3, 2, 6]

In [4]:
# Creating the Figure instance
fig_ln = px.line(x=x, y=y)

In [5]:
# printing the figure instance
print(fig_ln)

Figure({
    'data': [{'hovertemplate': 'x=%{x}<br>y=%{y}<extra></extra>',
              'legendgroup': '',
              'line': {'color': '#636efa', 'dash': 'solid'},
              'marker': {'symbol': 'circle'},
              'mode': 'lines',
              'name': '',
              'orientation': 'v',
              'showlegend': False,
              'type': 'scatter',
              'x': array([1, 2, 3, 4, 5], dtype=int64),
              'xaxis': 'x',
              'y': array([4, 5, 3, 2, 6], dtype=int64),
              'yaxis': 'y'}],
    'layout': {'legend': {'tracegroupgap': 0},
               'margin': {'t': 60},
               'template': '...',
               'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'x'}},
               'yaxis': {'anchor': 'x', 'domain': [0.0, 1.0], 'title': {'text': 'y'}}}
})


Figures are trees where the root node has the main layer attributes: data, layout, and frames.

In [6]:
# showing the plot
fig_ln.show()

In [7]:
fig_ln

## Simple Bar Chart

In [8]:
# Creating the Figure instance
fig_b = px.bar(x=x, y=y, color=x, width=800)
# showing the plot
fig_b.show()

In [9]:
# printing the figure instance
print(fig_b)

Figure({
    'data': [{'alignmentgroup': 'True',
              'hovertemplate': 'x=%{x}<br>y=%{y}<br>color=%{marker.color}<extra></extra>',
              'legendgroup': '',
              'marker': {'color': array([1, 2, 3, 4, 5], dtype=int64), 'coloraxis': 'coloraxis', 'pattern': {'shape': ''}},
              'name': '',
              'offsetgroup': '',
              'orientation': 'v',
              'showlegend': False,
              'textposition': 'auto',
              'type': 'bar',
              'x': array([1, 2, 3, 4, 5], dtype=int64),
              'xaxis': 'x',
              'y': array([4, 5, 3, 2, 6], dtype=int64),
              'yaxis': 'y'}],
    'layout': {'barmode': 'relative',
               'coloraxis': {'colorbar': {'title': {'text': 'color'}},
                             'colorscale': [[0.0, '#0d0887'], [0.1111111111111111,
                                            '#46039f'], [0.2222222222222222,
                                            '#7201a8'], [0.3333333333

## Automobile Dataset

We will use the Automobile Data Set [https://archive.ics.uci.edu/ml/datasets/automobile] from the UCI Machine Learning Repository [https://archive-beta.ics.uci.edu/]. It includes categorical and continuous variables. 

In [10]:
# Defining the headers
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration", "num_doors", 
            "body_style", "drive_wheels", "engine_location","wheel_base", "length", "width", 
            "height", "curb_weight", "engine_type", "num_cylinders", "engine_size", "fuel_system",
            "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm","city_mpg", 
            "highway_mpg", "price"]

In [11]:
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data",
                  header=None, names=headers, na_values="?" )
df.head(3)

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,...,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,,alfa-romero,gas,std,...,111.0,5000.0,21,27,13495.0
1,3,,alfa-romero,gas,std,...,111.0,5000.0,21,27,16500.0
2,1,,alfa-romero,gas,std,...,154.0,5000.0,19,26,16500.0


In [12]:
# Detecting missing values
df.isnull().sum()

symboling             0
normalized_losses    41
make                  0
fuel_type             0
aspiration            0
num_doors             2
body_style            0
drive_wheels          0
engine_location       0
wheel_base            0
length                0
width                 0
height                0
curb_weight           0
engine_type           0
num_cylinders         0
engine_size           0
fuel_system           0
bore                  4
stroke                4
compression_ratio     0
horsepower            2
peak_rpm              2
city_mpg              0
highway_mpg           0
price                 4
dtype: int64

In [13]:
# Removing the missing values
df.dropna(inplace=True)
df.isnull().sum()

symboling            0
normalized_losses    0
make                 0
fuel_type            0
aspiration           0
num_doors            0
body_style           0
drive_wheels         0
engine_location      0
wheel_base           0
length               0
width                0
height               0
curb_weight          0
engine_type          0
num_cylinders        0
engine_size          0
fuel_system          0
bore                 0
stroke               0
compression_ratio    0
horsepower           0
peak_rpm             0
city_mpg             0
highway_mpg          0
price                0
dtype: int64

## Bar Charts

Using the data in the DataFrame, we can plot several vertical and horizontal bar charts.

In [14]:
symb = df.symboling.value_counts(sort=False)
symb

 2    29
 1    46
 0    48
-1    20
 3    13
-2     3
Name: symboling, dtype: int64

In [15]:
# Defining a simple bar chart
fig_b2 = px.bar(x=symb.index, y=symb.values, width=800)
fig_b2.show()

In [16]:
# Adding color
fig_b2 = px.bar(x=symb.index, y=symb.values, color=symb.values, width=800)
fig_b2.show()

In [17]:
# Adding a title
fig_b2 = px.bar(x=symb.index, y=symb.values, color=symb.values, 
                width=800, title='Risk Factor Symbol (from safe to risky)')
fig_b2.show()

In [18]:
# Bar Chart of symboling by Make
fig_b3 = px.bar(df, x='make', y='symboling', width=800, title='Risk Factor Symbol by Make')
fig_b3.show()

In [19]:
# Adding color parameter
fig_b3 = px.bar(df, x='make', y='symboling', color='symboling',
                width=800, title='Risk Factor Symbol by Make')
fig_b3.show()

In [20]:
# Adding another variable to the color parameter
fig_b3 = px.bar(df, x='make', y='symboling', color='fuel_type',
                width=800, title='Risk Factor Symbol by Make and Fuel Type')
fig_b3.show()

In [21]:
# Using barmode='group'
fig_b3 = px.bar(df, x='make', y='symboling',  color='fuel_type', barmode='group', 
                width=800, title='Risk Factor Symbol by Make and Fuel Type')
fig_b3.show()

## Histograms

A histogram displays data with bars of different heights, where each bar groups numbers into ranges.

Suppose we want a similar graph with the average of symboling by make. We should use a histogram. Let's see!

In [22]:
# Using px.histogram with histfunc='avg'
fig_h1 = px.histogram(df, x='make', y='symboling',  color='fuel_type', barmode='group', histfunc='avg',
                width=800, title='Average of Risk Factor Symbol by Make and Fuel Type')
fig_h1.show()

In [23]:
# Removing barmode='group'
fig_h1 = px.histogram(df, x='make', y='symboling',  color='fuel_type', histfunc='avg',
                width=800, title='Average of Risk Factor Symbol by Make and Fuel Type')
fig_h1.show()

In [24]:
# Graphing average of Price by Make
fig_h2 = px.histogram(df, x='make', y='price', histfunc='avg',
                width=800, title='Price by Make')
fig_h2.show()

In [25]:
# Adding fuel_type as color variable
fig_h2 = px.histogram(df, x='make', y='price', color='fuel_type', barmode='group', histfunc='avg',
                width=800, title='Price by Make')
fig_h2.show()

In [26]:
# Considering body_style as color variable 
fig_h2= px.histogram(df, x='make', y='price', color='body_style', histfunc='avg',
                width=800, title='Price by Make')
fig_h2.show()

In [27]:
# Simple histogram of Car Length
fig_h3 = px.histogram(df, x="length", 
                width=800, title="Car Length")
fig_h3.show()

In [28]:
# Adding nbins parameter
fig_h3 = px.histogram(df, x="length", nbins=10, 
                width=800, title="Length")
fig_h3.show()

In [29]:
# Adding body_style as color variable
fig_h3 = px.histogram(df, x="length", nbins=10, color='body_style',
                width=800, title="Car Length & Body Style")
fig_h3.show()

In [30]:
# Adding fuel_type as shape variable
fig_h3 = px.histogram(df, x="length", nbins=10, color='body_style', pattern_shape='fuel_type',
                width=800, title="Car Length, Body Style & Fuel Type")
fig_h3.show()

In [31]:
# Histogram of price (The y-axis are the counts)
fig_h4 = px.histogram(df, x="price", 
                width=800, title="Price")
fig_h4.show()

In [32]:
# Adding text labels - Histogram of price (The y-axis are the counts)
fig_h4 = px.histogram(df, x="price", text_auto=True, 
                width=800, title="Price")
fig_h4.show()

In [33]:
# Histogram of price (The y-axis represents the probability density)
fig_h4 = px.histogram(df, x="price", histnorm='probability density', 
                width=800, title="Price")
fig_h4.show()

In [34]:
# Changing the color
fig_h4 = px.histogram(df, x="price",  histnorm='probability density', 
                color_discrete_sequence=['salmon'], 
                width=800, title="Price")
fig_h4.show()

In [35]:
# Horsepower histogram
fig_h5 = px.histogram(df, x="horsepower",  color_discrete_sequence=['seagreen'], 
            width=800, title="Horsepower")
fig_h5.show()

In [36]:
# Adding opacity=0.7 to the histogram
fig_h5 = px.histogram(df, x="horsepower",  color_discrete_sequence=['seagreen'], 
                opacity=0.7, width=800, title="Horsepower")
fig_h5.show()

In [37]:
# Horsepower Histogram with Average Price information 
fig_h5 = px.histogram(df, x="horsepower", y='price',  histfunc='avg', 
                color_discrete_sequence=['olivedrab'], opacity=0.7, 
                width=800, title="Horsepower with Average Price")
fig_h5.show()

Notice that the y-axis has the information of the price average.

In [38]:
# Horsepower Histogram with Maximum Price information
fig_h5 = px.histogram(df, x="horsepower", y='price', histfunc='max', 
                color_discrete_sequence=['olivedrab'], 
                width=800, title="Horsepower with Max Price")
fig_h5.show()

Notice that the y-axis has the information of the maximum price.

In [39]:
# Horsepower Histogram with Minimum Price information
fig_h5 = px.histogram(df, x="horsepower", y='price',  histfunc='min', 
                color_discrete_sequence=['olivedrab'], opacity=0.3, 
                width=800, title="Horsepower with Min Price")
fig_h5.show()

Notice that the y-axis has the information of the minimum price.

In [40]:
# City (MPG) Histogram by Fuel Type
fig_h6 = px.histogram(df, x="city_mpg",  nbins=30, color='drive_wheels', 
                width=800, title="City (MPG) by Fuel Type")
fig_h6.show()

In [41]:
# Adding marginal rug plot
fig_h6 = px.histogram(df, x="city_mpg",  nbins=30, color='drive_wheels', marginal='rug',
                width=800, title="City (MPG) by Fuel Type")
fig_h6.show()

In [42]:
# Adding marginal boxplot 
fig_h6 = px.histogram(df, x="city_mpg",  nbins=30, color='drive_wheels', marginal='box',
                width=800, title="City (MPG) by Fuel Type")
fig_h6.show()

In [43]:
# Adding marginal violin plot
fig_h6 = px.histogram(df, x="city_mpg",  nbins=30, color='drive_wheels', marginal='violin',
                width=800, title="City (MPG) by Fuel Type")
fig_h6.show()

## References

- https://plotly.com/python/plotly-express/
- https://plotly.com/python/bar-charts/