# Plotly Tutorial
Taken from [YouTube video](https://www.youtube.com/watch?v=GGL6U0k8WYA) by Derek Banas

## Imports & Setup

* Seaborn only used for default datasets
* `cufflinks` enables more native use plotting Pandas `df` in Plotly via the `.iplot()` method
    * Pandas now supports use of a plotly backend

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns

In [None]:
# Setup Plotly as pandas graphing backend
pd.options.plotting.backend = "plotly"

## Basics

### Main Idea

`Seaborn` : `matplotlib` :: `plotly.express` : `plotly.graph_objects`

### Make a basic plot

* Define a numpy array with 50 rows x 4 columns
* `df` from the `np.array`

In [None]:
arr_1 = np.random.randn(50, 4)
df_1 = pd.DataFrame(arr_1, columns=['A', 'B', 'C', 'D'])

Create a plot, with will automatically use the Plotly backend

In [None]:
df_1.plot()

## Line plots

Will use Plotly default `stocks` datasets

In [None]:
df_stocks = px.data.stocks()
df_stocks

Make a basic line plot of `$GOOG` price over time

In [None]:
px.line(
    df_stocks,
    x='date',
    y='GOOG',
    labels={'x': 'Date', 'y': 'Price'}
)

Make a plot w/ lines for both `$GOOG` & `$AAPL`

In [None]:
px.line(
    df_stocks,
    x='date',
    y=['GOOG', 'AAPL'],
    labels={'x': 'Date', 'y': 'Price'},
    title='$AAPL vs. $GOOG'
)

Create a similar plot using `graph_objects` rudiments
* Can set different marker settings for each column of data
* Set custom label to appear in legend

In [None]:
def customized_lineplot():
    # Make blank figure
    fig = go.Figure()

    # Line w/o markers for AAPL
    fig.add_trace(
        go.Scatter(
            x=df_stocks['date'],
            y=df_stocks['AAPL'],
            mode='lines',  # Line w/o markers
            name='$AAPL'  # Appears in legend
        )
    )

    # Line w/ markers for AMZN
    fig.add_trace(
        go.Scatter(
            x=df_stocks['date'],
            y=df_stocks['AMZN'],
            mode='lines+markers',  # Line w/o markers
            name='$AMZN'  # Appears in legend
        )
    )

    # Custom line styling for GOOG
    fig.add_trace(
        go.Scatter(
            x=df_stocks['date'],
            y=df_stocks['GOOG'],
            mode='lines+markers',  # Line w/o markers
            name='$GOOG',  # Appears in legend
            line={
                'color': 'firebrick',
                'width': 2,
                'dash': 'dashdot',
            }
        )
    )

    # Basic custom styling
    # fig.update_layout(
    #     title='Stock Price Data, 2018-2020',
    #     xaxis_title='Date',
    #     yaxis_title='Price',
    # )

    # Detailed custom styling
    fig.update_layout(
        xaxis={
            'showline': True,
            'showgrid': False,
            'linecolor': 'rgb(204, 204, 204)',
            'linewidth': 2,
            'showticklabels': True,
            'ticks': 'outside',
            'tickfont': {
                'family': 'Arial',
                'size': 12,
                'color': 'rgb(82, 82, 82)',
            },
        },
        yaxis=dict(
            showgrid=False,
            zeroline=False,
            showline=False,
            showticklabels=False,
        ),
        autosize=False,  # On by default
        margin=dict(
            autoexpand=False,  # Turn off autosizing
            l=100, r=100, t=100, b=100  # Left, right, top, bottom
        ),
        showlegend=True,
        plot_bgcolor='white',
    )

    return fig


customized_lineplot()

## Bar charts

Plotly `gapminder` dataset shows population data by country

In [None]:
df_us = px.data.gapminder().query('country == "United States"')
df_us

Graph population by year

In [None]:
px.bar(df_us, x='year', y='pop')

### Stacked bar chart

Using tips dataset

In [None]:
df_tips = px.data.tips()
df_tips

Make stacked bar chart

In [None]:
px.bar(
    df_tips,
    x='day',
    y='tip',
    color='sex',  # Shade by server sex
    title='Daily tips by sex',
    labels={  # Change labels on legend/hover
        'tip': 'Tip amount ($)',
        'day': 'Day of week',
    }
)

Stacked barchart with grouped bars

In [None]:
px.bar(
    df_tips,
    x='sex',
    y='total_bill',
    color='smoker',
    barmode='group',
)

### Another example

Population by country in Europe for 2007, for countries w/ at least 2M people

In [None]:
df_euro = px.data.gapminder().query('continent == "Europe" and year == 2007 and pop >= 2e6')
df_euro

In [None]:
def customized_barplot():
    fig = px.bar(
        df_euro,
        x='country',
        y='pop',
        color='country',  # Color by country
        text='pop'  # Text to display country's population on hover
    )

    # Custom formatting for population text pop-upa
    fig.update_traces(
        texttemplate='%{text:.2s}',  # 'text' variable 2-digit string value, right aligned
        textposition='outside',  # Text outside the bars
    )

    # Customize font settings
    fig.update_layout(
        uniformtext_minsize=8,  # All text font must be 8pt or greater
        uniformtext_mode='hide',  # Hide text if 8pt font won't fit
        # margin=dict(t=5),
        xaxis_tickangle=-45,  # Set angles for tick labels
    )

    return fig


customized_barplot()

## Scatterplots

Using `iris` dataset

In [None]:
df_iris = px.data.iris()
df_iris

Scatterplot of
* $\texttt{sepal\_width} = f(\texttt{sepal\_width})$
* color by `species`
* $size \sim \texttt{petal\_length}$
* `petal_width` shown on cursor hover

In [None]:
px.scatter(
    df_iris,
    x='sepal_width',
    y='sepal_length',
    color='species',
    size='petal_length',
    hover_data=['petal_width']
)

Additional customization options

In [None]:
def customized_scatterplot():
    fig = go.Figure()

    fig.add_trace(
        # Trace will be a scatterplot
        go.Scatter(
            x=df_iris['sepal_width'],
            y=df_iris['sepal_length'],
            mode='markers',
            marker_color=df_iris['sepal_width'],
            marker=dict(showscale=True),
            text=df_iris['species'],
        )
    )

    fig.update_traces(
        marker_line_width=2,
        marker_size=10
    )

    return fig


customized_scatterplot()

Use `go.Scattergl()` when doing scatterplots for large volume of data

In [None]:
def big_data_scatterplot():
    fig = go.Figure(
        data=go.Scattergl(
            x=np.random.randn(100_000),
            y=np.random.randn(100_000),
            mode='markers',
            marker=dict(
                color=np.random.randn(100_000),
                colorscale='Viridis',  # Custom cmap
                line_width=1
            )
        )
    )

    return fig


big_data_scatterplot()

## Pie charts

Use `gapminder` dataset to make chart of largest nations in Asia as of 2007

Built-in color scales can be found in [Plotly docs](https://plotly.com/python/builtin-colorscales/)

In [None]:
df_asia = px.data.gapminder().query('year == 2007').query('continent == "Asia"')
df_asia

Create pie chart

In [None]:
px.pie(
    df_asia,
    values='pop',
    names='country',
    title='Asia: population by country',
    color_discrete_sequence=px.colors.sequential.RdBu,
)

Custom pie chart

In [None]:
def customized_piechart():
    # Custom color sequence
    colors = ['blue', 'green', 'black', 'purple', 'red', 'brown']

    # Will create the data inside an array
    fig = go.Figure(
        data=[go.Pie(
            labels=['Water', 'Grass', 'Normal', 'Psychic', 'Fire', 'Ground'],
            values=[110, 90, 80, 80, 70, 60]
        )]
    )

    fig.update_traces(
        hoverinfo='label+percent',  # Display label & pct on cursor hover
        textinfo='label+percent',  # Dispay label & pct in slices
        textfont_size=20,

        # Pull slices to show them separately from rest of the pie
        pull=[
            # Pull 1st element (water) out 10%, 3rd (Normal) out 20%, rest left connected to the pie
            0.1, 0, 0.2, 0, 0, 0,
        ],

        # marker contains settings for the slices
        marker=dict(
            colors=colors,

            # Settings for outline of each pie slice
            line=dict(
                color='#FFFFFF',
                width=2,
            )
        )
    )

    return fig


customized_piechart()

## Histograms

Generate rolls of 2 6-sided dice

In [None]:
# 5000 rolls each
die_1 = np.random.randint(1, 7, 5000)
die_2 = np.random.randint(1, 7, 5000)
dice_sum = die_1 + die_2

In [None]:
def customized_histogram():
    fig = px.histogram(
        dice_sum,
        nbins=11,  # 11 possible 2-dice sums
        labels={'value': 'Dice roll'},
        title='5000 dice roll histogram',
        marginal='violin',  # Violin plot in margins
        color_discrete_sequence=['green']
    )

    fig.update_layout(
        xaxis_title_text='Number of rolls',
        yaxis_title_text='Dice sum',
        bargap=0.2,  # Space bins; atypical for histogram
        showlegend=False,
    )

    return fig


customized_histogram()

Stacked histogram

In [None]:
px.histogram(
    df_tips,
    x='total_bill',
    color='sex'
)

## Boxplots

In [None]:
px.box(
    df_tips,
    x='sex',
    y='tip',
    points='all'  # Vertical scatterplot of all tip amounts next to boxes
)

**Categorical boxplots**

Tip ranges by day

In [None]:
px.box(
    df_tips,
    x='day',
    y='tip',
    color='sex'
)

Custom boxplot

In [None]:
def customized_boxplot_tips():
    fig = go.Figure()

    fig.add_trace(
        go.Box(
            x=df_tips['sex'],
            y=df_tips['tip'],
            marker_color='blue',
            boxmean='sd'  # Draws on overlapped box for mean/SD
        )
    )

    return fig


customized_boxplot_tips()

More complex custom boxplot

In [None]:
def customized_boxplot_stocks():
    fig = go.Figure()

    fig.add_trace(
        go.Box(
            y=df_stocks['GOOG'],
            boxpoints='all',
            fillcolor='blue',
            jitter=0.5,  # Prevents overlapping points
            whiskerwidth=0.2
        )
    )

    fig.add_trace(
        go.Box(
            y=df_stocks['AAPL'],
            boxpoints='all',
            fillcolor='red',
            jitter=0.5,  # Prevents overlapping points
            whiskerwidth=0.2
        )
    )

    fig.update_layout(
        title='$GOOG vs. $AAPL',
        yaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            gridwidth=3,
        ),
        paper_bgcolor='rgb(243, 243, 243)',  # Background color of the itself
        plot_bgcolor='lightgray',
    )

    return fig


customized_boxplot_stocks()

## Violin Plots

Basic plot

In [None]:
px.violin(
    df_tips,
    y='total_bill',
    box=True,  # Embeds a boxplot within the violin
    points='all',
)

More complex plot

In [None]:
px.violin(
    df_tips,
    x='smoker',
    y='tip',
    color='sex',
    box=True,
    points='all',
    hover_data=df_tips.columns  # Display all column medians by category when hovering
)

### Custom plot

Note that for the filtering, the tutorial uses a suboptimal method that omits `.loc`:
* **No:**
    * ~~`df_tips['day'][df_tips['smoker'] == 'Yes']`~~
* **Yes:**
    * `df_tips.loc[df_tips['smoker'] == 'Yes', 'day']`

In [None]:
def customized_violinplot():
    fig = go.Figure()

    fig.add_trace(
        go.Violin(
            x=df_tips['day'][df_tips['smoker'] == 'Yes'],  # 2nd filtering query
            y=df_tips['total_bill'][df_tips['smoker'] == 'Yes'],
            legendgroup='Yes',
            scalegroup='Yes',
            name='Smoker',
            side='negative',  # This trace will be the left half of each violin
            line=dict(color='red'),
        )
    )

    fig.add_trace(
        go.Violin(
            x=df_tips['day'][df_tips['smoker'] == 'No'],  # 2nd filtering query
            y=df_tips['total_bill'][df_tips['smoker'] == 'No'],
            legendgroup='No',
            scalegroup='No',
            name='Non-smoker',
            side='positive',  # This trace will be the right half of each violin
            line=dict(color='blue'),
        )
    )

    return fig


customized_violinplot()

In [None]:
df_tips['day'][df_tips['smoker'] == 'No']

## Density Heatmaps

Using Seaborn `flights` dataset

While, it will not be shown, # of bins can be edited with the following keywords:
* x-axis bins: `nbinsx`
* y-axis bins: `nbinsy`

In [None]:
flights = sns.load_dataset('flights')
flights

Basic heatmap
* x-axis: year of flight
* y-axis: month of flight
* heat color: # of pax

In [None]:
px.density_heatmap(
    flights,
    x='year',
    y='month',
    z='passengers',
    color_continuous_scale='Viridis',
)

Add histograms in each margin
* `plotly.express` API does not allow both marginal histograms & custom color scale

In [None]:
px.density_heatmap(
    flights,
    x='year',
    y='month',
    z='passengers',
    marginal_x='histogram',  # Show x-axis
    marginal_y='histogram',
)

## 3-D Scatterplots

In [None]:
px.scatter_3d(
    flights,
    x='year',
    y='month',
    z='passengers',
    color='year',
    opacity=0.7  # Scatter points won't fully occlude each other
)

## 3-D Lineplots

In [None]:
## 3-D Scatterplots
px.line_3d(
    flights,
    x='year',
    y='month',
    z='passengers',
    color='year',
)

## Scatter matrix

In [None]:
px.scatter_matrix(flights, color='month')

## Map scatter plots

In [None]:
px.data.gapminder().query('year == 2007')

In [None]:
px.scatter_geo(
    px.data.gapminder().query('year == 2007'),
    locations='iso_alpha',
    color='continent',
    hover_name='country',
    size='pop',  # Marker size correlated to country pop
    projection='orthographic'  # Map projection; 'orthographic' gives a spinnable globe
)

## Choropleth Map

Map w/ shaded regions as a heatmap of magnitude

* *choros* = area
* *pleth* = multitude

Will use unemployment data via a JSON download from Plotly GitHub

In [None]:
import json
from urllib.request import urlopen

# US county geometry data
with urlopen('https://raw.githubusercontent.com/plotly/datasets/refs/heads/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

# US county unemployment data (FIPS = numerical county code)
df_unemp = pd.read_csv(
    'https://raw.githubusercontent.com/plotly/datasets/refs/heads/master/fips-unemp-16.csv',
    # dtype={'fips': str}
)

df_unemp

Draw basic choropleth

In [None]:
px.choropleth(
    df_unemp,
    geojson=counties,
    locations='fips',
    color='unemp',
    color_continuous_scale='Viridis',
    range_color=(0, 12),
    scope='usa',
    labels={'unemp': 'Unemployment rate'},
)

## Polar Charts

Will display wind data using polar coordinates

In [None]:
df_wind = px.data.wind()
df_wind

Draw basic polar plot

In [None]:
px.scatter_polar(
    df_wind,
    r='frequency',
    theta='direction',
    color='strength',
    size='frequency',
    symbol='strength'
)

In [None]:
px.line_polar(
    df_wind,
    r='frequency',
    theta='direction',
    color='strength',
    line_close=True,  # Lines should form a closed polygon 360 degrees
    template='plotly_dark'
)

## Ternary Plots

To represent data across 3 groups, will use the `experiment` dataset
* Consists of 3 experimental groups in a repeated-measures design

In [None]:
df_exp = px.data.experiment()
df_exp

In [None]:
px.scatter_ternary(
    df_exp,
    a='experiment_1',
    b='experiment_2',
    c='experiment_3',
    hover_name='group',
    color='gender'
)

## Facet plots

In [None]:
px.scatter(
    df_tips,
    x='total_bill',
    y='tip',
    color='smoker',
    facet_col='sex'
)

In [None]:
px.histogram(
    df_tips,
    x='total_bill',
    y='tip',
    color='sex',
    facet_row='time',
    facet_col='day',
    category_orders={'day': ['Thur', 'Fri', 'Sat', 'Sun'],
                     'time': ['Lunch', 'Dinner']}
)

Seaborn `attention` dataset

In [None]:
df_att = sns.load_dataset('attention')
df_att

In [None]:
px.line(
    df_att,
    x='solutions',
    y='score',
    facet_col='subject',
    facet_col_wrap=5,  # Wrap every 5 subjects
    title='Scores vs. Attention'
)

## Animated Plots

In [None]:
df_country = px.data.gapminder()
df_country

In [None]:
px.scatter(
    df_country,
    x='gdpPercap',
    y='lifeExp',
    animation_frame='year',
    animation_group='country',
    size='pop',
    color='continent',
    hover_name='country',
    log_x=True,  # log-scale GDP
    size_max=55,
    range_x=[100, 1e5],
    range_y=[25, 90]
)

In [None]:
px.bar(
    df_country,
    x='continent',
    y='pop',
    color='continent',
    animation_frame='year',
    animation_group='country',
    hover_name='country',
    range_y=[0, 4e9]
)