# Plotly



# Data Visualziation
Data Visualization is the graphical representation of information and data. By using visual elements like charts, graphs, and maps, data visualization tools provide an accessible way to see and understand trends, outliers, and patterns in data.

# Plotly
Plotly is a Python library which is used to design graphs, especially interactive graphs.\

It can plot various graphs and charts like histogram, barplot, boxplot, spreadplot, and many more.\
It is mainly used in data analysis as well as financial analysis. Plotly is an interactive visualization library.\
# Benefits of using plolty:
Plotly is interactive, the graphs can be zoomed in and out, downloaded as a PNG file, hovered over to see the data points, and much more.\
Plotly is compatible with Pandas DataFrames, making it easy to plot data directly from CSVs.\
Plotly can help making animated plots, which can be very useful for visualizing data over time.\
Plotly can be used to style interactive graphs with Jupyter notebook.\
Plotly with dash is a great choice for creating interactive dashboards that can be deployed on the web.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# remove warning
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = sns.load_dataset('diamonds')
print(df.shape)
df = df.sample(frac=0.1, random_state=10) # 10% of data
print(df.shape)

In [None]:
df.head()

In [None]:
# scatter plot
fig = px.scatter(df, x='carat', y='price')
fig.show()

In [None]:
# line plot
fig = px.line(df, x='carat', y='price')
fig.show()

In [None]:
# scatter plot
fig = px.scatter(df, x='carat', y='price', color='cut')
fig.show()

In [None]:
# scatter plot
fig = px.scatter(df, x='carat', y='price', color='cut',
                 title='Diamond Price vs Carat Weight',
                 )
fig.show()

In [None]:
# scatter plot
fig = px.scatter(df, x='carat', y='price', color='cut',
                 title='Diamond Price vs Carat Weight',
                 labels={'carat':'Carat Weight', 'price':'Price ($)'}
                 )
fig.show()

In [None]:
# Plot a line chart (example: mean price by carat)
df_mean = df.groupby('carat')['price'].mean().reset_index()
df_mean.head()
fig = px.line(df_mean, x='carat', y='price', 
              title='Mean Price by Carat')
fig.show()

In [None]:
sns.barplot(x='cut', y='price', data=df)

In [None]:
# Bar plot for average price per cut category
fig = px.bar(df, x='cut', y='price', 
             title='Average Price by Cut', color='cut')
fig.show()

In [None]:
df_stats = df.groupby('clarity')['price'].agg(['mean', 'std']).reset_index()
df_stats.head()

# Bar plot for average price per cut category
fig = px.bar(df_stats, x='clarity', y='mean', error_y='std', 
             title='Average Price by Cut', color='clarity')
fig.show()

In [None]:
# histogram
fig = px.histogram(df, x='price', nbins=50,  
                   title='Price Distribution')
fig.show()

In [None]:
# histogram
fig = px.histogram(df, x='price', nbins=50, color='cut', 
                   title='Price Distribution')
fig.show()

In [None]:
# density contour plot
fig = px.density_contour(df, x='carat', y='price', 
                         title='Density Contour Plot')
fig.show()

In [None]:
# box plot
fig = px.box(df, x='cut', y='price', 
             title='Price Distribution by Cut')
fig.show()

In [None]:
# box plot
fig = px.box(df, x='cut', y='price',
             # outliers
            points='outliers', # 'outliers', 'suspectedoutliers'
            title='Price Distribution by Cut')

In [None]:
# box plot
fig = px.box(df, x='cut', y='price', color='clarity',
             # outliers
            points='suspectedoutliers', # 'outliers', 'suspectedoutliers'
            title='Price Distribution by Cut',
            labels={'price':'Price ($)', 'cut':'Cut', 'clarity':'Clarity'}
            )
fig.show()

In [None]:
# violin plot
fig = px.violin(df, x='cut', y='price', 
                # color='clarity',
             # outliers
            points='suspectedoutliers', # 'outliers', 'suspectedoutliers'
            title='Price Distribution by Cut',
            labels={'price':'Price ($)', 'cut':'Cut', 
                    # 'clarity':'Clarity'
                    }
            )
fig.show()

In [None]:
# facet scatter plot by clarity
fig = px.scatter(df, x='carat', y='price', 
                 color='cut', 
                 facet_col='clarity',
                 title='Price vs Carat by Cut and Clarity',)
fig.show()

In [None]:
# facet scatter plot by clarity
fig = px.scatter(df, x='carat', y='price', 
                color='cut', 
                size='price',
                facet_col='clarity',
                 # animate by cut
                animation_frame='cut', 
                title='Price vs Carat by Cut and Clarity',)
fig.show()

In [None]:
# Density heatmap of carat and price
fig = px.density_heatmap(df, x='carat', y='price', 
                         title='Density Heatmap of Carat vs Price')
fig.show()

# Subplots

In [None]:
from plotly.subplots import make_subplots

# Create subplots
fig = make_subplots(rows=1, cols=2, 
                    subplot_titles=('Carat vs Price', 
                                    'Density Heatmap of Carat vs Price'))

# Scatter plot of carat vs price
scatter = px.scatter(df, x='carat', y='price').data[0]
fig.add_trace(scatter, row=1, col=1)

# Density heatmap of carat vs price
heatmap = px.density_heatmap(df, x='carat', y='price').data[0]
fig.add_trace(heatmap, row=1, col=2)

# Update layout
fig.update_layout(title_text='Carat vs Price and Density Heatmap of Carat vs Price')
fig.show()

In [None]:
# creat heatmap of correlation matrix of only numerical columns
corr = df[['carat', 'depth', 'table', 'price', 'x', 'y', 'z']].corr()
corr

In [None]:
# creat heatmap of correlation matrix of only numerical columns
corr = df[['carat', 'depth', 'table', 'price', 'x', 'y', 'z']].corr()
fig = go.Figure(data=go.Heatmap(x=corr.index.values, 
                                y=corr.columns.values, 
                                z=corr.values, 
                                colorscale='Viridis')) 
# colorscale can be 'Viridis', 'Cividis', 'Blues', 'Greens', 'Reds', 'Oranges', 'YlOrRd', 'YlGnBu', 'RdBu', 'Picnic', 'Rainbow', 'Portland', 'Jet', 'Hot', 'Blackbody', 'Earth', 'Electric', 'Plasma', 'Magma', 'Inferno', 'Cividis'
fig.show()

# 3D plots

In [None]:
# 3D scatter plot
fig = px.scatter_3d(df, 
                    x='carat', 
                    y='depth', 
                    z='price', 
                    color='cut', 
                    title='3D Scatter Plot of Carat, Depth, and Price')
fig.show()

In [None]:
# 3d line plot
fig = px.line_3d(df, x='carat', y='depth', z='price', color='cut')
fig.show()

In [None]:
# 3D scatter plot
fig = px.scatter_3d(df, 
                    x='carat', 
                    y='depth', 
                    z='cut', 
                    color='price', 
                    title='3D Scatter Plot of Carat, Depth, and Price')
fig.show()

# Other plots

In [None]:
# Bubble chart with carat and price, using depth as size
fig = px.scatter(df, 
                 x='carat', 
                 y='price', 
                 size='price', 
                 color='cut', 
                 title='Bubble Chart: Carat vs Price with Depth')
fig.show()

In [None]:
# Area chart of cumulative price over carat
df_sorted = df.sort_values(by='carat')
fig = px.area(df_sorted, 
              x='carat', 
              y='price', 
            #   color='cut',
              title='Cumulative Price by Carat')
fig.show()

In [None]:
# Area chart of cumulative price over carat
df_sorted = df.sort_values(by='carat')
fig = px.area(df_sorted, 
              x='carat', 
              y='price', 
              color='cut',
              title='Cumulative Price by Carat')
fig.show()

In [None]:
# pie
fig = px.pie(df, names='clarity', 
             # pallete
            color_discrete_sequence=px.colors.sequential.RdBu,
            title='Diamond Cut Distribution')
fig.show()

In [None]:
# treemap
fig = px.treemap(df, path=['cut', 'clarity', 'color'], 
                 values='price', 
                 title='Diamond Price by Cut and Clarity')
fig.show()

In [None]:
# parallel corrdinates plot
fig = px.parallel_coordinates(df, color='price',
                                # dimensions=['carat', 'depth'], # choose columns 
                              title='Parallel Coordinates Plot')
fig.show()

In [None]:
# Scatter matrix for carat, depth, table, and price
fig = px.scatter_matrix(df, 
                        dimensions=['carat', 'depth', 'table', 'price'], 
                        color='cut', 
                        title='Scatter Matrix')
fig.show()

In [None]:
# Gantt chart of a random time series data from 2020-01-01 to 2020-01-10
df_gantt = pd.DataFrame({
    'Task': ['Groceries', 'Shopping', 'School', 'Travel', 'Learning'],
    'Start': pd.to_datetime(['2024-01-01', '2024-01-03', '2024-01-02', '2024-01-05', '2024-01-06']),
    'Finish': pd.to_datetime(['2024-01-03', '2024-01-06', '2024-01-05', '2024-01-08', '2024-01-10'])
})
print(df_gantt.head())
fig = px.timeline(df_gantt, 
                  x_start='Start', 
                  x_end='Finish', 
                  y='Task', 
                  title='Gantt Chart of Tasks')
fig.show()

In [None]:
# strip plot
fig = px.strip(df, x='cut', y='price', 
               title='Strip Plot of Price by Cut')
fig.show()

In [None]:
# strip plot
fig = px.strip(df, x='cut', y='price', 
               title='Strip Plot of Price by Cut')
fig.update_traces(jitter=0.5, marker=dict(size=5)) # jitter is the amount of noise mean how far the points are spread 
# out and marker size is the size of the points
fig.show()

In [None]:
# strip plot
fig = px.strip(df, x='cut', y='price', 
               color='clarity',
               hover_data=['carat'],
               title='Strip Plot of Price by Cut')
fig.update_layout(title_font_size=20, 
                  xaxis_title='Carat Size', 
                  yaxis_title='Diamond Price')
fig.show()

In [None]:
# sunburst chart

fig = px.sunburst(df, 
                path=['cut', 'clarity', 'color'],
                values='price',
                title='Sunburst Chart of Diamond Price by Cut, Clarity, and Color')
fig.show()

In [None]:
ig = px.sunburst(df, 
                path=['cut', 'clarity', 'color'],
                values='price',
                title='Sunburst Chart of Diamond Price by Cut, Clarity, and Color')
# save plot as html
#fig.write_html('sunburst_chart.html')

import plotly.io as pio
# save a png with high dpi 
#fig.write_image('sunburst_chart.png', scale=2)

# save as svg
#fig.write_image('sunburst_chart.svg')

# save as pdf
# fig.write_image('sunburst_chart.pdf')

# Plotly Express
## Marginal pltos using plotly



In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='histogram', 
                 marginal_y='histogram',
                 title='Marginal Histograms of Carat and Price')
fig.show()

In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='box', 
                 marginal_y='box',
                 title='Marginal Histograms of Carat and Price')
fig.show()

In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='violin', 
                 marginal_y='violin',
                 title='Marginal Histograms of Carat and Price')
fig.show()

In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='box', 
                 marginal_y='histogram',
                 title='Marginal Histograms of Carat and Price')
fig.show()

In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='box', 
                 marginal_y='violin',
                 title='Marginal Histograms of Carat and Price')
fig.show()

In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='box', 
                 marginal_y='violin',
                 color='cut',
                 title='Marginal Histograms of Carat and Price')
fig.show()

In [None]:
# scatter plot with marginal histograms
fig = px.scatter(df, x='carat', y='price', 
                 marginal_x='box', 
                 marginal_y='violin',
                 color='cut',
                 facet_col='clarity',
                 facet_row='color',
                 title='Marginal Histograms of Carat and Price')
# figure size
fig.update_layout(height=1200, width=800)
fig.show()

# Video Plots Animated Plots

In [None]:
# Scatter plot
fig = px.scatter(df, x='carat', y='price', 
                 color='cut', 
                 size='price',
                 facet_col='color',
                #  facet_row='clarity',
                 animation_frame='cut',
                 title='Diamond Price vs Carat Weight')
# remove legends from the plot
fig.update_layout(showlegend=False)
# size of figure
fig.update_layout(height=400, width=1000)
fig.show()

# save as html
# fig.write_html('diamond_price_carat_animated.html')

# More Plots in Plotly

In [4]:
# scatter plot
df = sns.load_dataset('diamonds')
fig = px.scatter(df, x='carat', y='price',
                 title='Diamond Price vs Carat Weight',
                 log_y=True,
                 log_x=True,
                 )
fig.show()

In [5]:
# Set axes ranges
fig = px.scatter(df, x='carat', y='price', color= 'cut' ,title='Custom Axes Ranges')
fig.update_xaxes(range=[0, 5])
fig.update_yaxes(range=[0, 10000])
fig.show()

In [6]:
# Polar chart using cut as a category
df = sns.load_dataset('diamonds')
df = df.sample(frac=0.1, random_state=10)

In [9]:
# Polar chart using cut as a category
fig = px.scatter_polar(df, r='carat', theta='cut', 
                       title='Polar Chart of Carat by Cut')
fig.show()

In [10]:
df[['carat', 'depth', 'table', 'price']].corr()

Unnamed: 0,carat,depth,table,price
carat,1.0,0.025077,0.167961,0.921045
depth,0.025077,1.0,-0.290898,-0.0088
table,0.167961,-0.290898,1.0,0.116569
price,0.921045,-0.0088,0.116569,1.0


In [11]:
# Heatmap with annotations

fig = px.imshow(df[['carat', 'depth', 'table', 'price']].corr(), 
                text_auto=True, 
                title='Correlation Heatmap with Annotations')
fig.show() # correlation heatmap

In [14]:
from plotly.subplots import make_subplots
# Creating subplots
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=df['carat'], y=df['price'], mode='markers', name='Scatter'), row=1, col=1)
fig.add_trace(go.Box(y=df['price'], name='Box Plot'), row=1, col=2)
fig.update_layout(title_text='Subplots Example')
fig.show()

In [15]:
# scatter plot with dual y-axes
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df['carat'], y=df['price'], mode='markers', name='Carat vs Price'), secondary_y=False)
fig.add_trace(go.Scatter(x=df['carat'], y=df['depth'], mode='markers', name='Carat vs Depth'), secondary_y=True)
fig.update_layout(title_text='Scatter Plot with Dual Y-Axes')
# label each y axis
fig.update_yaxes(title_text='Price', secondary_y=False)
fig.update_yaxes(title_text='Depth', secondary_y=True)
fig.show()

In [16]:
# Waterfall chart of price by cut
cut_avg_price = df.groupby('cut')['price'].mean().reset_index()

# Create waterfall chart using plotly.graph_objects
fig = go.Figure(go.Waterfall(
	name="Average Price",
	orientation="v",
	x=cut_avg_price['cut'],
	y=cut_avg_price['price']
))

fig.update_layout(title='Waterfall Chart of Average Price by Cut')
fig.show()





In [17]:
df['clarity'].value_counts().sort_values(ascending=False)

clarity
SI1     1277
VS2     1192
SI2      926
VS1      869
VVS2     493
VVS1     353
IF       184
I1       100
Name: count, dtype: int64

In [18]:
# Funnel chart for clarity distribution
clarity_counts = df['clarity'].value_counts().reset_index()
clarity_counts.columns = ['clarity', 'count']
fig = px.funnel(clarity_counts, x='count', y='clarity', title='Funnel Chart of Clarity Distribution')
fig.show()

In [19]:
# Bullet chart for price distribution
fig = go.Figure(go.Indicator(
    mode="gauge+number",
    value=df['price'].mean(),
    title={"text": "Average Diamond Price"},
    gauge={"axis": {"range": [0, max(df['price'])]}}
))
fig.show()

In [20]:
# Candlestick chart for price trends (using carat as a time-like variable)
fig = go.Figure(data=[go.Candlestick(
    x=df['carat'], open=df['price'], high=df['price'], low=df['price'], close=df['price']
)])
fig.update_layout(title='Candlestick Chart: Price Trends by Carat')
fig.show()

In [21]:
df = sns.load_dataset('diamonds')
# Ternary plot of carat, depth, and table
fig = px.scatter_ternary(df, a='carat', b='depth', c='table', title='Ternary Plot: Carat, Depth, and Table')
fig.show()

In [22]:
# Stacked area chart of price by cut
df_cut_price = df.groupby(['cut', 'carat'])['price'].mean().reset_index()
fig = px.area(df_cut_price, x='carat', y='price', color='cut', title='Stacked Area Chart of Price by Cut')
fig.show()



