# Lab Instructions

Create 3 visualizations from a spatial and time-series dataset of your choice.  Describe your dataset including where it came from and the features it contains.  Each visualization should be accompanied by at least 1 - 2 sentences explaining how the features do (or do not!) change over time and througout space.

In [4]:
import pandas as pd
import plotly.express as px

file_path = r"C:\Users\MALICIOUS\source\repos\Data_Visualization_And_Modeling\Lab\VSRR_Provisional_Drug_Overdose_Death_Counts.csv"

df = pd.read_csv(file_path)
df.head()


Unnamed: 0,State,Year,Month,Period,Indicator,Data Value,Percent Complete,Percent Pending Investigation,State Name,Footnote,Footnote Symbol,Predicted Value
0,AK,2015,April,12 month-ending,Heroin (T40.1),,100.0,0.0,Alaska,Numbers may differ from published reports usin...,**,
1,AK,2015,April,12 month-ending,Psychostimulants with abuse potential (T43.6),,100.0,0.0,Alaska,Numbers may differ from published reports usin...,**,
2,AK,2015,April,12 month-ending,"Opioids (T40.0-T40.4,T40.6)",,100.0,0.0,Alaska,Numbers may differ from published reports usin...,**,
3,AK,2015,April,12 month-ending,Natural & semi-synthetic opioids (T40.2),,100.0,0.0,Alaska,Numbers may differ from published reports usin...,**,
4,AK,2015,April,12 month-ending,"Natural & semi-synthetic opioids, incl. methad...",,100.0,0.0,Alaska,Numbers may differ from published reports usin...,**,


In [5]:
import pandas as pd
import numpy as np
import plotly.express as px

# -----------------------------
# 1) Load
# -----------------------------
file_path = r"C:\Users\MALICIOUS\source\repos\Data_Visualization_And_Modeling\Lab\VSRR_Provisional_Drug_Overdose_Death_Counts.csv"
df = pd.read_csv(file_path)

# -----------------------------
# 2) Clean + Date
# -----------------------------
df = df[df["Indicator"] == "Number of Drug Overdose Deaths"].copy()

df["DataValue_num"] = pd.to_numeric(df["Data Value"].astype(str).str.replace(",", ""), errors="coerce")
df["Predicted_num"] = pd.to_numeric(df["Predicted Value"].astype(str).str.replace(",", ""), errors="coerce")

df["Deaths"] = df["DataValue_num"].fillna(df["Predicted_num"])

df["Date"] = pd.to_datetime(df["Year"].astype(str) + "-" + df["Month"].astype(str), format="%Y-%B")

df = df.dropna(subset=["Deaths", "Date"])

# -----------------------------
# Visualization 1: Time-Series (US over time)
# -----------------------------
us = df[df["State"] == "US"].sort_values("Date")

fig1 = px.line(
    us,
    x="Date",
    y="Deaths",
    title="US Drug Overdose Deaths Over Time (12-month ending)"
)
fig1.show()

# -----------------------------
# Visualization 2: Spatial (Latest month map)
# -----------------------------
latest_date = df["Date"].max()

states_latest = df[(df["Date"] == latest_date) & (df["State"] != "US")].copy()

fig2 = px.choropleth(
    states_latest,
    locations="State",
    locationmode="USA-states",
    color="Deaths",
    scope="usa",
    hover_name="State Name",
    title=f"Drug Overdose Deaths by State (Latest: {latest_date.strftime('%B %Y')})"
)
fig2.show()

# -----------------------------
# Visualization 3: Top 5 states over time
# -----------------------------
top_states = (
    states_latest.sort_values("Deaths", ascending=False)
    .head(5)["State"]
    .tolist()
)

top_over_time = df[df["State"].isin(top_states)].sort_values("Date")

fig3 = px.line(
    top_over_time,
    x="Date",
    y="Deaths",
    color="State",
    title=f"Overdose Deaths Over Time: Top 5 States (as of {latest_date.strftime('%B %Y')})"
)
fig3.show()
