# PLOT INTERACTIVE SCATTERPLOT USING PLOTLY EXPRESS

In [1]:
# Make sure to install plotly 
#!pip install plotly==4.14.3

In [2]:
# The plotly Python package empowers anyone to create, manipulate and render graphical figures.
# The figures are represented by data structures referred to as figures. 
# The rendering process uses the Plotly.js JavaScript library under the hood but you never need to use Java directly. 
# Figures can be represented in Python either as dictionaries or as instances of the plotly.graph_objects

# Note: 
# Plotly Express is the recommended entry-point into the plotly package 
# PLotly Express is the high-level plotly.express module that consists of Python functions which return fully-populated plotly.graph_objects.Figure objects. 
# plotly.express module contains functions that can create interactive figures using a very few lines of code
# Plotly Express is refered to as px. 
# Plotly Express is a built-in part of the plotly library 
# Plotly Express function uses graph objects internally and returns a plotly.graph_objects.Figure instance.
# check out the documentation here: https://plotly.com/python/plotly-express/ 
import plotly.express as px
import pandas as pd
import plotly.io as pio

In [3]:
salary_df = pd.read_csv('employee_salaries.csv')
salary_df

Unnamed: 0,Years_of_Experience,Salary
0,1.000000,40000.00000
1,2.257942,65979.42119
2,2.450875,67253.57549
3,2.498713,67342.43510
4,2.613729,70532.20448
...,...,...
1995,19.178575,421534.69100
1996,19.254499,430478.02650
1997,19.353369,438090.84540
1998,19.842520,482242.16080


In [5]:
# Plot Years of Experience Vs. Salary Using Plolty Express

fig1 = px.scatter(salary_df, x = 'Years_of_Experience', y = 'Salary')
pio.write_html(fig1, 'plot11.html',auto_open=True)

In [6]:
# Let's import another more advanced dataset entitled University admission (university_admission)

# GRE Scores (out of 340)
# TOEFL Scores (out of 120)
# University Rating (out of 5)
# Statement of Purpose (SOP) 
# Letter of Recommendation (LOR) Strength (out of 5)
# Undergraduate GPA (out of 10)
# Research Experience (either 0 or 1)
# Chance of admission (ranging from 0 to 1)

admission_df = pd.read_csv('university_admission.csv')
admission_df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.00,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.80
4,5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...,...
495,496,332,108,5,4.5,4.0,9.02,1,0.87
496,497,337,117,5,5.0,5.0,9.87,1,0.96
497,498,330,120,5,4.5,5.0,9.56,1,0.93
498,499,312,103,4,4.0,5.0,8.43,0,0.73



- **Plot the scatter plot for GRE Score vs. chance of admission**
- **What do you infer from that plot?**
- **Use the color attribute to show the university rating as a third dimension**

In [8]:
fig1 = px.scatter(admission_df, x='GRE Score',y='Chance of Admit')
pio.write_html(fig1, 'plot12.html',auto_open=True)
print("By seeing the graph we can conclude theat as the GRE score is high the student chance of entering to that particular university will be high");
fig11 = px.scatter_3d(admission_df, x = 'GRE Score', y = 'Chance of Admit', z = 'University Rating', color='University Rating')
fig11.update_layout(
    template='plotly_dark'
)
pio.write_html(fig11, 'plot13.html',auto_open=True)

By seeing the graph we can conclude theat as the GRE score is high the student chance of entering to that particular university will be high


# PLOT INTERACTIVE BUBBLE CHART (SCATTERPLOT WITH SIZE)

In [9]:
# Let's add a fourth variable "SOP" as the size 
fig2 = px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', color = 'University Rating', size = 'SOP')
pio.write_html(fig2, 'plot2.html',auto_open=True)

In [10]:
# You can also add more data on hover using hover_data
fig2 = px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', color = 'University Rating', size = 'SOP',hover_data = ['LOR'])
pio.write_html(fig2, 'plot21.html',auto_open=True)


- **Modify the SOP column to make the bubble size variations more prominent** 

In [11]:
#admission_df['SOP']=admission_df['SOP']**2
import numpy as np
#admission_df['SOP']=np.sqrt(admission_df['SOP'])
fig2 = px.scatter(admission_df, x = 'GRE Score', y = 'Chance of Admit', color = 'University Rating', size = 'SOP',hover_data = ['LOR'])
pio.write_html(fig2, 'plot22.html',auto_open=True)

# PLOT INTERACTIVE SINGLE LINEPLOT USING PLOTLY EXPRESS

In [12]:
# Import the crypto currency dataset
crypto_df = pd.read_csv('crypto_prices.csv')
crypto_df.head(10)

Unnamed: 0,Date,BTC-USD Price,ETH-USD Price,LTC-USD Price
0,9/17/2014,457.334015,,5.05855
1,9/18/2014,424.440002,,4.68523
2,9/19/2014,394.79599,,4.32777
3,9/20/2014,408.903992,,4.28644
4,9/21/2014,398.821014,,4.24592
5,9/22/2014,402.152008,,4.24235
6,9/23/2014,435.790985,,4.74657
7,9/24/2014,423.204987,,4.66679
8,9/25/2014,411.574005,,4.51465
9,9/26/2014,404.424988,,4.43359


In [13]:
fig3 = px.line(crypto_df, x = 'Date', y = 'BTC-USD Price')
pio.write_html(fig3, 'plot3.html',auto_open=True)


- **Plot interactive line plot for Ethereum and Litecoin.**
- **What is the maximum price of Bitcoin, Ethereum and Litecoin over the specified time period?**
- **Indicate the date when these peak prices took place**


In [14]:
crypto_df_cleaned = crypto_df.dropna(subset=['Date', 'BTC-USD Price', 'ETH-USD Price','LTC-USD Price'])
fig31 = px.line(crypto_df_cleaned, x = 'Date', y = 'BTC-USD Price')
fig32 = px.line(crypto_df_cleaned, x = 'Date', y = 'ETH-USD Price')
fig33 = px.line(crypto_df_cleaned, x = 'Date', y = 'LTC-USD Price')

pio.write_html(fig31, 'plot31.html',auto_open=True)
pio.write_html(fig32, 'plot32.html',auto_open=True)
pio.write_html(fig33, 'plot33.html',auto_open=True)

# Find the dates when prices were at their maximum
max_btc = crypto_df_cleaned['BTC-USD Price'].max()
max_eth = crypto_df_cleaned['ETH-USD Price'].max()
max_ltc = crypto_df_cleaned['LTC-USD Price'].max()

date_max_btc = crypto_df_cleaned.loc[crypto_df_cleaned['BTC-USD Price'].idxmax(), 'Date']
date_max_eth = crypto_df_cleaned.loc[crypto_df_cleaned['ETH-USD Price'].idxmax(), 'Date']
date_max_ltc = crypto_df_cleaned.loc[crypto_df_cleaned['LTC-USD Price'].idxmax(), 'Date']

# Print the maximum values and their corresponding dates
print("Maximum BTC-USD Price:", max_btc, "on", date_max_btc)
print("Maximum ETH-USD Price:", max_eth, "on", date_max_eth)
print("Maximum LTC-USD Price:", max_ltc, "on", date_max_ltc)

Maximum BTC-USD Price: 61243.08594 on 3/13/2021
Maximum ETH-USD Price: 1977.276855 on 4/1/2021
Maximum LTC-USD Price: 358.3359985 on 12/18/2017


# PLOT INTERACTIVE MULTIPLE LINE PLOTS USING PLOTLY EXPRESS

In [16]:
crypto_df = pd.read_csv('crypto_prices.csv')
crypto_df.head()

Unnamed: 0,Date,BTC-USD Price,ETH-USD Price,LTC-USD Price
0,9/17/2014,457.334015,,5.05855
1,9/18/2014,424.440002,,4.68523
2,9/19/2014,394.79599,,4.32777
3,9/20/2014,408.903992,,4.28644
4,9/21/2014,398.821014,,4.24592


In [17]:
crypto_df.columns

Index(['Date', 'BTC-USD Price', 'ETH-USD Price', 'LTC-USD Price'], dtype='object')

In [18]:
fig4 =px.line()

for i in crypto_df.columns[1:]:
    fig4.add_scatter(x = crypto_df['Date'], y = crypto_df[i], name = i)
    
pio.write_html(fig4, 'plot41.html',auto_open=True)


- **Use "compare data on hover" feature to indicate the prices of LTC and ETH when BTC price peaked.**
- **Use Pandas operations to filter out to filter out the DataFrame and confirm your answers**

In [19]:
fig4 = px.line()

# Iterate through the columns except for 'Date'
for i in crypto_df.columns[1:]:
    fig4.add_scatter(x=crypto_df['Date'], y=crypto_df[i], name=i)

# Enable the "compare data on hover" feature
fig4.update_layout(hovermode='x unified')

# Find the date when BTC price peaked
max_btc_price = crypto_df['BTC-USD Price'].max()
btc_peak_date = crypto_df.loc[crypto_df['BTC-USD Price'] == max_btc_price, 'Date'].values[0]
print("Max price is :: ",max_btc_price, ", At location :: ", btc_peak_date)

crypto_df[crypto_df['BTC-USD Price'] == crypto_df['BTC-USD Price'].max()]

crypto_df[crypto_df['Date'] == '3/13/2021']

# Save the plot as an HTML file and open it
pio.write_html(fig4, 'plot42.html', auto_open=True)

Max price is ::  61243.08594 , At location ::  3/13/2021


# PLOT INTERACTIVE PIE CHARTS

In [20]:
# Define a dictionary with all crypto allocation in a portfolio
# Note that total summation = 100% 

my_dict = {'Allocation %' : [20, 20, 20, 20, 20]}
my_dict

{'Allocation %': [20, 20, 20, 20, 20]}

In [21]:
crypto_df = pd.DataFrame(data = my_dict , index = ['ADA','BTC','LTC','ETC','XRP'])
crypto_df

Unnamed: 0,Allocation %
ADA,20
BTC,20
LTC,20
ETC,20
XRP,20


In [22]:
# Use Plotly Express to plot a pie chart 
fig5 = px.pie(crypto_df, values = 'Allocation %', names = ['ADA','BTC','LTC','ETC','XRP'], title = 'Crypto Occupancy')
pio.write_html(fig5, 'plot51.html',auto_open=True)


- **Assume that you became bullish on XRP and decided to allocate 60% of your assets in it. You also decided to equally divide the rest of your assets in other coins (BTC, LTC, ADA, and ETH). Change the allocations and plot the pie chart.**
- **Use 'hole' attribute and see its impact on the pie chart (External Research is Required)**

In [23]:
my_dict = {'Allocation %' : [10, 10, 10, 10, 60]}
crypto_df = pd.DataFrame(data = my_dict , index = ['ADA','BTC','LTC','ETC','XRP'])
fig5 = px.pie(crypto_df, values = 'Allocation %', names = ['ADA','BTC','LTC','ETC','XRP'], title = 'Crypto Occupancy')
pio.write_html(fig5, 'plot52.html',auto_open=True)
fig5 = px.pie(crypto_df, values = 'Allocation %', names = ['ADA','BTC','LTC','ETC','XRP'], title = 'Crypto Occupancy', hole = 0.3)
pio.write_html(fig5, 'plot53.html',auto_open=True)

#  PLOT INTERACTIVE BAR CHART 

In [24]:
# Gapminder combines data from multiple sources in a time-series format
# Check this out: https://www.gapminder.org/data/
data = px.data.gapminder()
data.head(10)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4
5,Afghanistan,Asia,1977,38.438,14880372,786.11336,AFG,4
6,Afghanistan,Asia,1982,39.854,12881816,978.011439,AFG,4
7,Afghanistan,Asia,1987,40.822,13867957,852.395945,AFG,4
8,Afghanistan,Asia,1992,41.674,16317921,649.341395,AFG,4
9,Afghanistan,Asia,1997,41.763,22227415,635.341351,AFG,4


In [25]:
# Gapminder combines data from multiple sources in a time-series format
# Check this out: https://www.gapminder.org/data/
# You can read the data directly as follows: data = px.data.gapminder()

# Alternatively, you can import the data as follows:

data = pd.read_csv('gapminder.csv')
data

Unnamed: 0.1,Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,1,Afghanistan,Asia,1957,30.332,9240934,820.853030,AFG,4
2,2,Afghanistan,Asia,1962,31.997,10267083,853.100710,AFG,4
3,3,Afghanistan,Asia,1967,34.020,11537966,836.197138,AFG,4
4,4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4
...,...,...,...,...,...,...,...,...,...
1699,1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716
1700,1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716
1701,1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960,ZWE,716
1702,1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716


In [26]:
# Filter the data based on the country of choice
country_df = data[ data['country'] == 'Canada']
country_df

Unnamed: 0.1,Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
240,240,Canada,Americas,1952,68.75,14785584,11367.16112,CAN,124
241,241,Canada,Americas,1957,69.96,17010154,12489.95006,CAN,124
242,242,Canada,Americas,1962,71.3,18985849,13462.48555,CAN,124
243,243,Canada,Americas,1967,72.13,20819767,16076.58803,CAN,124
244,244,Canada,Americas,1972,72.88,22284500,18970.57086,CAN,124
245,245,Canada,Americas,1977,74.21,23796400,22090.88306,CAN,124
246,246,Canada,Americas,1982,75.76,25201900,22898.79214,CAN,124
247,247,Canada,Americas,1987,76.86,26549700,26626.51503,CAN,124
248,248,Canada,Americas,1992,77.95,28523502,26342.88426,CAN,124
249,249,Canada,Americas,1997,78.61,30305843,28954.92589,CAN,124


In [27]:
fig6 = px.bar(country_df, x = 'year', y = 'pop', title = 'Population of Canada', labels = {'pop': 'Population of Canada'},height = 700)
pio.write_html(fig6, 'plot61.html',auto_open=True)

In [28]:
# You can add hoverdata and color (third dimension) as follows:
fig6 = px.bar(country_df, x = 'year', y = 'pop', color = 'lifeExp', hover_data = ['gdpPercap'], title = 'Population of Canada', labels = {'pop': 'Population of Canada'},height = 700)
pio.write_html(fig6, 'plot62.html',auto_open=True)

- **Plot similar plot for Egypt instead of Canada**


In [29]:
country_df = data[ data['country'] == 'Egypt']
fig6 = px.bar(country_df, x = 'year', y = 'pop', color = 'lifeExp', hover_data = ['gdpPercap'], title = 'Population of Egypt', labels = {'pop': 'Population of Egypt'},height = 700)
pio.write_html(fig6, 'plot63.html',auto_open=True)

#  PLOT INTERACTIVE GANTT CHART

In [30]:
# Define Job #1 
job_1 = {'Task': 'Development','start': '2023-01-03', 'end': '2023-03-03'}
job_1

{'Task': 'Development', 'start': '2023-01-03', 'end': '2023-03-03'}

In [31]:
# Define Job #2 
job_2 = {'Task': 'QA','start': '2023-03-04', 'end': '2023-04-03'}
job_2

{'Task': 'QA', 'start': '2023-03-04', 'end': '2023-04-03'}

In [32]:
# Define Job #3
job_3 = {'Task': 'Documentation','start': '2023-04-04', 'end': '2023-04-18'}
job_3

{'Task': 'Documentation', 'start': '2023-04-04', 'end': '2023-04-18'}

In [33]:
project = pd.DataFrame([job_1,job_2,job_3])
project

Unnamed: 0,Task,start,end
0,Development,2023-01-03,2023-03-03
1,QA,2023-03-04,2023-04-03
2,Documentation,2023-04-04,2023-04-18


In [34]:
fig6 = px.timeline(project, x_start = 'start', x_end = 'end', y = 'Task')
fig6.update_yaxes(autorange = 'reversed')
pio.write_html(fig6, 'plot71.html',auto_open=True)


- **Add an additional task of "Send the course for approval" that lasts for 1 day and that starts immediately after "Edit Videos & Upload Content"** 

In [35]:
# Define Job #4
job_4 = {'Task': 'DEMO','start': '2023-04-18', 'end': '2023-04-19'}
project = pd.DataFrame([job_1,job_2,job_3,job_4])
fig6 = px.timeline(project, x_start = 'start', x_end = 'end', y = 'Task')
fig6.update_yaxes(autorange = 'reversed')
pio.write_html(fig6, 'plot72.html',auto_open=True)

#  PLOT INTERACTIVE SUNBURST

In [36]:
# A sunburst plot represents hierarchial data as sectors laid out over several levels of concentric rings
restaurant_df = pd.read_csv('restaurant_mini.csv')
restaurant_df.dropna()


Unnamed: 0,Customer ID,Day,Dining or Takeout,Age,Invoice
0,1,Saturday,Dining,23,45
1,2,Saturday,Dining,22,70
2,3,Sunday,Takeout,26,80
3,4,Sunday,Takeout,30,100


In [37]:
fig8 = px.sunburst(restaurant_df, path =['Dining or Takeout','Day','Age'],values ='Invoice')
pio.write_html(fig8, 'plot81.html',auto_open=True)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.




- **Import the full restaurant dataset "restaurant.csv"**
- **Plot the sunburst plot using the following: path = [day, time, sex] and value = [total bill]** 

In [38]:
restaurant_df = pd.read_csv('restaurant.csv')
restaurant_df.dropna()

Unnamed: 0.1,Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,0,16.99,1.01,Female,No,Sun,Dinner,2
1,1,10.34,1.66,Male,No,Sun,Dinner,3
2,2,21.01,3.50,Male,No,Sun,Dinner,3
3,3,23.68,3.31,Male,No,Sun,Dinner,2
4,4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...,...
239,239,29.03,5.92,Male,No,Sat,Dinner,3
240,240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,242,17.82,1.75,Male,No,Sat,Dinner,2


In [39]:
fig8 = px.sunburst(restaurant_df, path =['day','time','sex'],values ='total_bill')
pio.write_html(fig8, 'plot82.html',auto_open=True)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

