## MSDS 455 Data Visualization
## Assignment 3 Hierarchical & Part-to-Whole Data
## Melek Mizher
## Data used was sourced from Kaggle from a dataset added by Dan Ofer
## All the data used in the visualizations below focus on USP Drug Classification Data from 2018
##
### https://www.kaggle.com/datasets/danofer/usp-drug-classification

# Required Dependencies

In [1]:
#Required Dependency to get data hosted in GitHub with Requests and Pandas
#!pip install openpyxl
#Plotly is required for the Visuazliation
!pip install plotly



# Import Required Libraries

In [2]:
#Import
# For DataFrame Structure and Data Manipulation
import pandas as pd

# Plotly for the representations
import plotly
import plotly.express as px

# Data Setup

### Obtain data from GitHub Repository to ensure notebook operates in other computers

In [3]:
#Link to raw file in Github to download the Pre-Processed Spreadsheet
url='https://github.com/MelekM/Mizher_MSDS455_Assignment3_HierarchicalData/raw/main/Data/usp_drug_classification.csv'

### Extract Data of Interest From CSV into a Pandas DataFrame

In [4]:
df = pd.read_csv(url)

In [5]:
df

Unnamed: 0,usp_category,usp_class,usp_drug,kegg_id_drug,drug_example,kegg_id_drug_example,nomenclature
0,Analgesics,Nonsteroidal Anti-inflammatory Drugs,celecoxib,,celecoxib,D00567,(JAN/USAN/INN)
1,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,DG00441,diclofenac,D07816,(USAN/INN)
2,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,DG00441,diclofenac potassium,D00903,(USAN)
3,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,DG00441,diclofenac sodium,D00904,(JP17/USP)
4,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,DG00441,diclofenac diethylamine,D07817,
...,...,...,...,...,...,...,...
1798,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,,sodium lactate,D02183,(JAN/USP)
1799,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,,sodium ferric gluconate complex,D05859,(USAN)
1800,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,,"sodium phosphate, monobasic",D04400,(USP)
1801,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,,"sodium phosphate, dibasic",D05869,(USP)


## Removing unnecessary columns from data

In [6]:
df = df.drop(['kegg_id_drug', 'kegg_id_drug_example', 'nomenclature'], axis=1)

In [7]:
df

Unnamed: 0,usp_category,usp_class,usp_drug,drug_example
0,Analgesics,Nonsteroidal Anti-inflammatory Drugs,celecoxib,celecoxib
1,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,diclofenac
2,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,diclofenac potassium
3,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,diclofenac sodium
4,Analgesics,Nonsteroidal Anti-inflammatory Drugs,diclofenac,diclofenac diethylamine
...,...,...,...,...
1798,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,sodium lactate
1799,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,sodium ferric gluconate complex
1800,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,"sodium phosphate, monobasic"
1801,Therapeutic Nutrients/Minerals/Electrolytes,Electrolyte/Mineral Replacement,sodium,"sodium phosphate, dibasic"


# Data Representation as Total Number of Drugs per Category and Class

In [8]:
category_count_df = df.groupby('usp_category', as_index=False)[['usp_class']].count()

In [9]:
#category_count_df displays the total number of drugs under each category.
category_count_df

Unnamed: 0,usp_category,usp_class
0,Analgesics,96
1,Anesthetics,4
2,Anti-Addiction/Substance Abuse Treatment Agents,20
3,Anti-inflammatory Agents,46
4,Antibacterials,196
5,Anticonvulsants,42
6,Antidementia Agents,9
7,Antidepressants,67
8,Antiemetics,52
9,Antimigraine Agents,28


In [10]:
#class_count_df displays the total number of drugs under each class.
class_count_df = df.groupby('usp_class', as_index=False)[['drug_example']].count()

In [11]:
class_count_df

Unnamed: 0,usp_class,drug_example
0,1st Generation/Typical,34
1,2nd Generation/Atypical,17
2,Alcohol Deterrents/Anti-craving,5
3,Alkylating Agents,11
4,Alpha-adrenergic Agonists,13
...,...,...
141,Treatment-Resistant,1
142,Tricyclics,18
143,Vaccines,22
144,"Vasodilators, Direct-acting Arterial",4


# Data Visualization

In [17]:
# Define figure as a plotly express sunburst.
# Path defines the path from the Root to the Leaves of the Tree
fig = px.sunburst(df, path=['usp_category', 'usp_class', 'usp_drug', 'drug_example'])

#Adds title to the Sunburst chart
fig.update_layout(title_text="All USP Drugs Sunburst Visualization", font_size=16)

#Saves the Sunburst chart as an HTML file.
plotly.offline.plot(fig, filename='Graphics/Sunburst.html')

#Shows the Treemap on output
fig.show()

In [18]:
# Define figure as a plotly express sunburst.
# Path defines the path from the Root to the Leaves of the Tree
fig = px.sunburst(df, path=['usp_category', 'usp_class'])

#Adds title to the Sunburst chart
fig.update_layout(title_text="USP Categories and Classes Sunburst Visualization", font_size=16)

#Saves the Sunburst chart as an HTML file.
plotly.offline.plot(fig, filename='Graphics/ClassCategory_Level_Sunburst.html')

#Shows the Treemap on output
fig.show()

In [13]:
# Define figure as a plotly express treemap.
# Path defines the path from the Root to the Leaves of the Tree
# Width/Height reshapes the visualization to a user-defined pixel-size.
fig = px.treemap(df,
                 path=['usp_category', 'usp_class', 'usp_drug', 'drug_example'],
                 width=1920,
                 height=1080
)



#Adds title to the Treemap
fig.update_layout(title_text="All USP Drugs Treemap Visualization", font_size=16)



#Saves the Treemap as an HTML file.
plotly.offline.plot(fig, filename='Graphics/All_Treemap.html')

#Shows the Treemap on output
fig.show()

## Simplified Treemap

In [14]:
# Define figure as a plotly express treemap.
# Path defines the path from the Root to the Leaves of the Tree
# Counts automatically size up the leaves of the tree.
# Width/Height reshapes the visualization to a user-defined pixel-size.
fig = px.treemap(df,
                 path=['usp_category', 'usp_class', 'usp_drug'],
                 width=1920,
                 height=1080
                 )



#Adds title to the Treemap
fig.update_layout(title_text="USP Drug Level Treemap Visualization", font_size=16)



#Saves the Treemap as an HTML file.
plotly.offline.plot(fig, filename='Graphics/Drug_Level_Treemap.html')

#Shows the Treemap on output
fig.show()

In [15]:
# Define figure as a plotly express treemap.
# Path defines the path from the Root to the Leaves of the Tree
# Counts automatically size up the leaves of the tree.
# Width/Height reshapes the visualization to a user-defined pixel-size.
fig = px.treemap(df,
                 path=['usp_category', 'usp_class'],
                 width=1920,
                 height=1080
                 )



#Adds title to the Treemap
fig.update_layout(title_text="USP Therapeutic Class Treemap Visualization", font_size=16)



#Saves the Treemap as an HTML file.
plotly.offline.plot(fig, filename='Graphics/Class_Level_Treemap.html')

#Shows the Treemap on output
fig.show()

In [16]:
# Define figure as a plotly express treemap.
# Path defines the path from the Root to the Leaves of the Tree
# Counts automatically size up the leaves of the tree.
# Width/Height reshapes the visualization to a user-defined pixel-size.
fig = px.treemap(df,
                 path=['usp_category'],
                 width=1920,
                 height=1080
                 )



#Adds title to the Treemap
fig.update_layout(title_text="USP Therapeutic Category Treemap Visualization", font_size=16)



#Saves the Treemap as an HTML file.
plotly.offline.plot(fig, filename='Graphics/Category_Level_Treemap.html')

#Shows the Treemap on output
fig.show()