import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import chart_studio.plotly as py
import seaborn as sns
import cufflinks as cf
%matplotlib inline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
arr_1 = np.random.randn(50,4)
df_1 = pd.DataFrame(arr_1, columns=['A', 'B', 'C', 'D'])
df_1.head()
df_1.iplot()
df_stocks = px.data.stocks()
px.line(df_stocks, x='date', y='GOOG', labels={'x': 'Date', 'y': 'Price'})
px.line(df_stocks, x='date', y=['GOOG', 'AAPL'], labels={'x': 'Date', 'y': 'Price'})
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.AAPL, mode='lines', name='Apple'))
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.AMZN, mode='lines+markers', name='Amazone'))
fig.add_trace(go.Scatter(x=df_stocks.date, y=df_stocks.GOOG, mode='lines+markers', name='Google', line=dict(color='firebrick', width=2, dash='dashdot')))
fig.update_layout(
xaxis=dict(
showline=True, showgrid=False, showticklabels=True, linecolor='rgb(204, 204, 204)', linewidth=2, ticks='outside', tickfont=dict(
family='Arial', size=12, color='rgb(82,82,82)',
),
),
yaxis=dict(
showgrid=False, zeroline=False, showline=False, showticklabels=False
),
autosize=False,
margin=dict(
autoexpand=False, l=100, r=20, t=110,
),
showlegend=False,
plot_bgcolor='White'
)
fig.show()
df_us = px.data.gapminder().query('country == "United States"')
px.bar(df_us, x='year', y='pop')
df_tips = px.data.tips()
px.bar(df_tips, x='day', y='tip', color='sex', title='Tips by Sex on Each Day', labels={'tip': 'Tip Amount', 'day': 'Day of the Week'})
px.bar(df_tips, x='day', y='tip', color='sex', barmode='group')
df_iris = px.data.iris()
df_iris.head()
px.scatter(df_iris, x='sepal_width', y='sepal_length', color='species', size='petal_length', hover_data=['petal_width'])
# px.scatter(df_iris, x='sepal_width', y='sepal_length', color='species', size='petal_length')
# Create a customized scatter with black marker edges with line width 2, opaque
# and colored based on width. Also show a scale on the right
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df_iris.sepal_width, y=df_iris.sepal_length,
mode='markers',
marker_color=df_iris.sepal_width,
text=df_iris.species,
marker=dict(showscale=True)
))
fig.update_traces(marker_line_width=2, marker_size=10)
# Working with a lot of data use Scattergl
fig = go.Figure(data=go.Scattergl(
x = np.random.randn(100000),
y = np.random.randn(100000),
mode='markers',
marker=dict(
color=np.random.randn(100000),
colorscale='Viridis',
line_width=1
)
))
fig
# Create Pie chart of the largest nations in Asia
# Color maps here plotly.com/python/builtin-colorscales/
df_samer = px.data.gapminder().query("year == 2007").query("continent == 'Asia'")
df_samer.head()
px.pie(df_samer, values='pop',
names='country',
title='Population of Asian continent',
color_discrete_sequence=px.colors.sequential.RdBu)
# Customize pie chart
colors = ['blue', 'green', 'black', 'purple', 'red', 'brown']
fig = go.Figure(data=[go.Pie(labels=['Water','Grass','Normal','Psychic', 'Fire', 'Ground'],
values=[110,90,80,80,70,60])])
# Define hover info, text size, pull amount for each pie slice, and stroke
fig.update_traces(hoverinfo='label+percent', textfont_size=20,
textinfo='label+percent', pull=[0.1, 0, 0.2, 0, 0, 0],
marker=dict(colors=colors, line=dict(color='#FFFFFF', width=2)))
fig.show()
# Plot histogram based on rolling 2 dice
dice_1 = np.random.randint(1,7,5000)
dice_2 = np.random.randint(1,7,5000)
dice_sum = dice_1 + dice_2
# bins represent the number of bars to make
# Can define x label, color, title
# marginal creates another plot (violin, box, rug)
fig = px.histogram(dice_sum, nbins=11, labels={'value':'Dice Roll'},
title='5000 Dice Roll Histogram', marginal='violin',
color_discrete_sequence=['green'])
fig.update_layout(
xaxis_title_text='Dice Roll',
yaxis_title_text='Dice Sum',
bargap=0.2, showlegend=False
)
# A box plot allows you to compare different variables
# The box shows the quartiles of the data. The bar in the middle is the median
# The whiskers extend to all the other data aside from the points that are considered
# to be outliers
df_tips = px.data.tips()
# We can see which sex tips the most, points displays all the data points
px.box(df_tips, x='sex', y='tip', points='all')
# Display tip sex data by day
px.box(df_tips, x='day', y='tip', color='sex')
# Adding standard deviation and mean
fig = go.Figure()
fig.add_trace(go.Box(x=df_tips.sex, y=df_tips.tip, marker_color='blue',
boxmean='sd'))
# Complex Styling
df_stocks = px.data.stocks()
fig = go.Figure()
# Show all points, spread them so they don't overlap and change whisker width
fig.add_trace(go.Box(y=df_stocks.GOOG, boxpoints='all', name='Google',
fillcolor='blue', jitter=0.5, whiskerwidth=0.2))
fig.add_trace(go.Box(y=df_stocks.AAPL, boxpoints='all', name='Apple',
fillcolor='red', jitter=0.5, whiskerwidth=0.2))
# Change background / grid colors
fig.update_layout(title='Google vs. Apple',
yaxis=dict(gridcolor='rgb(255, 255, 255)',
gridwidth=3),
paper_bgcolor='rgb(243, 243, 243)',
plot_bgcolor='rgb(243, 243, 243)')
# Violin Plot is a combination of the boxplot and KDE
# While a box plot corresponds to data points, the violin plot uses the KDE estimation
# of the data points
df_tips = px.data.tips()
px.violin(df_tips, y="total_bill", box=True, points='all')
# Multiple plots
px.violin(df_tips, y="tip", x="smoker", color="sex", box=True, points="all",
hover_data=df_tips.columns)
# Morph left and right sides based on if the customer smokes
fig = go.Figure()
fig.add_trace(go.Violin(x=df_tips['day'][ df_tips['smoker'] == 'Yes' ],
y=df_tips['total_bill'][ df_tips['smoker'] == 'Yes' ],
legendgroup='Yes', scalegroup='Yes', name='Yes',
side='negative',
line_color='blue'))
fig.add_trace(go.Violin(x=df_tips['day'][ df_tips['smoker'] == 'No' ],
y=df_tips['total_bill'][ df_tips['smoker'] == 'No' ],
legendgroup='Yes', scalegroup='Yes', name='No',
side='positive',
line_color='red'))
# Create a heatmap using Seaborn data
flights = sns.load_dataset("flights")
flights
# You can set bins with nbinsx and nbinsy
fig = px.density_heatmap(flights, x='year', y='month', z='passengers',
color_continuous_scale="Viridis")
fig
# You can add histograms
fig = px.density_heatmap(flights, x='year', y='month', z='passengers',
marginal_x="histogram", marginal_y="histogram")
fig
# Create a heatmap using Seaborn data
flights = sns.load_dataset("flights")
flights
# Create a 3D scatter plot using flight data
fig = px.scatter_3d(flights, x='year', y='month', z='passengers', color='year',
opacity=0.7, width=800, height=400)
fig
fig = px.line_3d(flights, x='year', y='month', z='passengers', color='year')
fig
# With a scatter matrix we can compare changes when comparing column data
fig = px.scatter_matrix(flights, color='month')
fig
# Polar charts display data radially
# Let's plot wind data based on direction and frequency
# You can change size and auto-generate different symbols as well
df_wind = px.data.wind()
px.scatter_polar(df_wind, r="frequency", theta="direction", color="strength",
size="frequency", symbol="strength")
# Data can also be plotted using lines radially
# A template makes the data easier to see
px.line_polar(df_wind, r="frequency", theta="direction", color="strength",
line_close=True, template="plotly_dark", width=800, height=400)
# You can create numerous subplots
df_tips = px.data.tips()
px.scatter(df_tips, x="total_bill", y="tip", color="smoker", facet_col="sex")
# We can line up data in rows and columns
px.histogram(df_tips, x="total_bill", y="tip", color="sex", facet_row="time", facet_col="day",
category_orders={"day": ["Thur", "Fri", "Sat", "Sun"], "time": ["Lunch", "Dinner"]})
# This dataframe provides scores for different students based on the level
# of attention they could provide during testing
att_df = sns.load_dataset("attention")
fig = px.line(att_df, x='solutions', y='score', facet_col='subject',
facet_col_wrap=5, title='Scores Based on Attention')
fig
# Create an animated plot that you can use to cycle through continent
# GDP & life expectancy changes
df_cnt = px.data.gapminder()
px.scatter(df_cnt, x="gdpPercap", y="lifeExp", animation_frame="year",
animation_group="country",
size="pop", color="continent", hover_name="country",
log_x=True, size_max=55, range_x=[100,100000], range_y=[25,90])
# Watch as bars chart population changes
px.bar(df_cnt, x="continent", y="pop", color="continent",
animation_frame="year", animation_group="country", range_y=[0,4000000000])