#Analyzing the Societal and Economic Impact of Drug Use in the United States

####Importing required Libraries

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
from google.colab import files

!pip install plotly pandas
import plotly.express as px
import plotly.graph_objects as go



####Comparison of Drug Induced Deaths with Alcohol Induced Deaths

In [None]:
df = pd.read_csv("/content/Deaths 2023.csv")
df.head()

Unnamed: 0,County,County Code,UCD - Drug/Alcohol Induced,UCD - Drug/Alcohol Induced Code,UCD - Drug/Alcohol Induced Cause,UCD - Drug/Alcohol Induced Cause Code,MCD - Drug/Alcohol Induced,MCD - Drug/Alcohol Induced Code,MCD - Drug/Alcohol Induced Cause,MCD - Drug/Alcohol Induced Cause Code,Deaths,Population,Crude Rate
0,"Baldwin County, AL",1003,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,70,253507,27.6
1,"Baldwin County, AL",1003,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,All other non-drug and non-alcohol causes,O,All other non-drug and non-alcohol causes,O9,70,253507,27.6
2,"Bibb County, AL",1007,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,10,21868,Unreliable
3,"Bibb County, AL",1007,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,All other non-drug and non-alcohol causes,O,All other non-drug and non-alcohol causes,O9,10,21868,Unreliable
4,"Blount County, AL",1009,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,18,59816,Unreliable


In [None]:
# Extract state abbreviation
df["State"] = df["County"].apply(lambda x: x.split(", ")[-1])

# Convert Deaths to numeric
df["Deaths"] = pd.to_numeric(df["Deaths"], errors='coerce')

# Filter codes and assign pastel colors
filter_codes = ['All', 'D', 'O', 'A']
fig_data = []
buttons = []

# Create one bar trace for each code
for i, code in enumerate(filter_codes):
    if code == 'All':
        filtered_df = df
    else:
        filtered_df = df[
            (df["UCD - Drug/Alcohol Induced Code"] == code) |
            (df["MCD - Drug/Alcohol Induced Code"] == code)
        ]

    grouped = filtered_df.groupby("State", as_index=False)["Deaths"].sum()

    bar = go.Bar(
        x=grouped["State"],
        y=grouped["Deaths"],
        name=f"Code: {code}",
        visible=(i == 0),
        marker_color=px.colors.qualitative.Pastel[i % len(px.colors.qualitative.Pastel)]
    )
    fig_data.append(bar)

    # Visibility toggle logic for dropdown
    vis_map = [j == i for j in range(len(filter_codes))]
    buttons.append(dict(
        label=code,
        method="update",
        args=[{"visible": vis_map},
              {"title": f"Total Deaths by State - Code: {code}"}]
    ))

# Create the interactive figure
fig = go.Figure(data=fig_data)

# Add dropdown for filter codes
fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        showactive=True,
        x=1.02,
        xanchor="left",
        y=1,
        yanchor="top"
    )],
    title="Total Deaths by State due to various causes - Code: D",
    xaxis_title="State",
    yaxis_title="Number of Deaths",
    showlegend=False,
    margin=dict(t=120, r=220)
)

# Add static legend annotation
fig.add_annotation(
    text="Legend Codes:<br><b>D</b> - Drug-induced causes<br><b>O</b> - All other non-drug and non-alcohol causes<br><b>A</b> - Alcohol-induced causes",
    align="left",
    showarrow=False,
    xref="paper",
    yref="paper",
    x=1.2,
    y=1.25,
    bordercolor="lightgray",
    borderwidth=2,
    borderpad=5,
    bgcolor="#f9f9f9",
    font=dict(size=12),
)

# Show the chart
fig.show()
#fig.write_html("interactive_chart_1.1.html")
#files.download("interactive_chart_1.1.html")


####Analyzing Drug Related Deaths for the past 5 years and Age Groups (2018 - 2023)

In [None]:
df1 = pd.read_csv("/content/Drug Induced Deaths 2018 to 2023.csv")
df.head()

Unnamed: 0,County,County Code,UCD - Drug/Alcohol Induced,UCD - Drug/Alcohol Induced Code,UCD - Drug/Alcohol Induced Cause,UCD - Drug/Alcohol Induced Cause Code,MCD - Drug/Alcohol Induced,MCD - Drug/Alcohol Induced Code,MCD - Drug/Alcohol Induced Cause,MCD - Drug/Alcohol Induced Cause Code,Deaths,Population,Crude Rate,State
0,"Baldwin County, AL",1003,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,70,253507,27.6,AL
1,"Baldwin County, AL",1003,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,All other non-drug and non-alcohol causes,O,All other non-drug and non-alcohol causes,O9,70,253507,27.6,AL
2,"Bibb County, AL",1007,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,10,21868,Unreliable,AL
3,"Bibb County, AL",1007,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,All other non-drug and non-alcohol causes,O,All other non-drug and non-alcohol causes,O9,10,21868,Unreliable,AL
4,"Blount County, AL",1009,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,Drug-induced causes,D,Drug poisonings (overdose) Unintentional (X40-...,D1,18,59816,Unreliable,AL


In [None]:
# Filter for age groups up to 49 years
age_groups_to_include = [
    '10-14 years','15-19 years', '20-24 years', '25-29 years', '30-34 years',
    '35-39 years', '40-44 years', '45-49 years'
]

filtered_df1 = df1[df1['Five-Year Age Groups'].isin(age_groups_to_include)]

# Group by Year and Age Group, sum the Deaths
grouped = filtered_df1.groupby(['Year', 'Five-Year Age Groups'])['Deaths'].sum().reset_index()

# Pivot for plotting
pivot_df1 = grouped.pivot(index='Year', columns='Five-Year Age Groups', values='Deaths').fillna(0)

# --- BAR CHART ---
fig = go.Figure()

# Add one bar trace per age group
for age_group in pivot_df1.columns:
    fig.add_trace(go.Bar(
        x=pivot_df1.index,
        y=pivot_df1[age_group],
        name=age_group
    ))

# Customize layout
fig.update_layout(
    barmode='group',  # Side-by-side bars for each year
    title='Drug Induced Deaths by Age Group (Up to 49) Across Years',
    xaxis_title='Year',
    yaxis_title='Total Deaths',
    legend_title='Age Groups',
    template='plotly_white',
    width=1000,
    height=600
)

fig.show()
#fig.write_html("interactive_chart_1.2.html")
#files.download("interactive_chart_1.2.html")

In [None]:
fig = go.Figure()
for age_group in pivot_df1.columns:
    fig.add_trace(go.Scatter(
        x=pivot_df1.index,
        y=pivot_df1[age_group],
        mode='lines+markers',
        name=age_group
    ))

# Customize layout
fig.update_layout(
    title='Trend of Drug-Induced Deaths by Age Group (Up to 49) Over Years',
    xaxis_title='Year',
    yaxis_title='Total Deaths',
    legend_title='Age Groups',
    template='plotly_white',
    hovermode='x unified',
    width=1000,
    height=600
)

fig.show()
#fig.write_html("interactive_chart_1.3.html")
#files.download("interactive_chart_1.3.html")

####Analyzing Unemployment Trends

In [None]:
df2 = pd.read_csv("/content/Unemployment2023.csv")
df2.head()

Unnamed: 0,FIPS_Code,State,Area_Name,Attribute,Value
0,1000,AL,Alabama,Civilian_labor_force_2000,2147173.0
1,1000,AL,Alabama,Employed_2000,2047731.0
2,1000,AL,Alabama,Unemployed_2000,99442.0
3,1000,AL,Alabama,Unemployment_rate_2000,4.6
4,1000,AL,Alabama,Civilian_labor_force_2001,2128027.0


In [None]:
df_unemp_2023 = df2[df2['Attribute'] == 'Unemployment_rate_2023']

# Remove duplicates if any (e.g. multiple counties per state)
df_unemp_2023 = df_unemp_2023.drop_duplicates(subset=['State'])

# Sort alphabetically for consistency
df_unemp_2023 = df_unemp_2023.sort_values('State')

# Create interactive bar chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=df_unemp_2023['State'],
    y=df_unemp_2023['Value'],
    marker_color='lightskyblue',
    name='Unemployment Rate 2023',
    hovertemplate='State: %{x}<br>Rate: %{y:.2f}%<extra></extra>'
))

# Layout tweaks
fig.update_layout(
    title='Unemployment Rate by State (2023)',
    xaxis_title='State',
    yaxis_title='Unemployment Rate (%)',
    xaxis_tickangle=45,
    template='plotly_white',
    height=600,
    width=1000
)

fig.show()
#fig.write_html("interactive_chart_2.1.html")
#files.download("interactive_chart_2.1.html")

In [None]:
import pandas as pd
import plotly.graph_objs as go

# Step 1: Filter rows where Attribute contains unemployment_rate_
unemployment_data = df2[df2['Attribute'].str.contains('unemployment_rate_', case=False)]

# Step 2: Extract the year from the 'Attribute' column (e.g., unemployment_rate_2018 -> 2018)
unemployment_data['Year'] = unemployment_data['Attribute'].str.extract(r'(\d{4})').astype(int)

# Step 3: Group by 'Year' and sum the 'Value' column (total unemployment)
total_unemployed = unemployment_data.groupby('Year')['Value'].sum().reset_index()

# Step 4: Create a line chart
fig = go.Figure()

# Plot the total unemployment (line chart)
fig.add_trace(go.Scatter(
    x=total_unemployed['Year'],
    y=total_unemployed['Value'],
    name="Total Unemployment",
    mode='lines+markers',
    line=dict(color="#d62728", width=3, dash='dot'),
    hovertemplate='Year: %{x}<br>Unemployed: %{y:,.0f}<extra></extra>',
))

# Layout settings
fig.update_layout(
    title="Total Unemployment Over the Years (2018–2023)",
    xaxis=dict(title="Year"),
    yaxis=dict(
        title="Total Unemployed",
        titlefont=dict(color="#d62728"),
        tickfont=dict(color="#d62728")
    ),
    legend=dict(x=0.5, y=1.15, orientation="h", xanchor="center"),
    template="plotly_white",
    hovermode="x unified",
    margin=dict(t=100)
)

fig.show()
#fig.write_html("interactive_chart_2.2.html")
#files.download("interactive_chart_2.2.html")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



###Analyzing Drug Induced Deaths alongside Unemployment rates for the year 2023

In [None]:
def get_deaths_by_state(code):
    filtered = df[
        (df["UCD - Drug/Alcohol Induced Code"] == code) |
        (df["MCD - Drug/Alcohol Induced Code"] == code)
    ]
    return filtered.groupby("State", as_index=False)["Deaths"].sum()

# Step 4: Get deaths for Code D and Code A
df_d = get_deaths_by_state("D")

# Step 1: Clean column names just in case
df_d.columns = df_d.columns.str.strip()
df_unemp_2023.columns = df_unemp_2023.columns.str.strip()

# Step 2: Filter for rows where the 'Attribute' column equals 'Unemployment_rate_2023'
df_unemp_2023_filtered = df_unemp_2023[df_unemp_2023['Attribute'] == 'Unemployment_rate_2023']

# Step 3: Extract 'State' and 'Unemployment_rate_2023' and drop duplicates
df_unemp_2023_filtered = df_unemp_2023_filtered[['State', 'Value']].rename(columns={'Value': 'Unemployment_rate_2023'}).drop_duplicates()

# Step 4: Ensure 'Deaths' column in df_d has a consistent name
df_d = df_d.rename(columns={"Deaths": "Deaths_D"})

# Step 5: Merge both datasets on 'State'
merged_df = pd.merge(df_d, df_unemp_2023_filtered, on="State", how="inner")

# Step 6: Plot interactive line chart using Plotly
fig = go.Figure()

# Add Drug-Induced Deaths line
fig.add_trace(go.Scatter(
    x=merged_df["State"],
    y=merged_df["Deaths_D"],
    mode='lines+markers',
    name="Drug-Induced Deaths",
    line=dict(color='crimson')
))

# Add Unemployment Rate line (scaled for better comparison)
fig.add_trace(go.Scatter(
    x=merged_df["State"],
    y=merged_df["Unemployment_rate_2023"],
    mode='lines+markers',
    name="Unemployment Rate (2023)",
    line=dict(color='royalblue'),
    yaxis="y2"  # Secondary y-axis
))

# Layout with dual y-axes for better comparison
fig.update_layout(
    title="Drug-Induced Deaths vs. Unemployment Rate by State (2023)",
    xaxis_title="State",
    yaxis=dict(
        title="Drug-Induced Deaths",
        titlefont=dict(color="crimson"),
        tickfont=dict(color="crimson"),
    ),
    yaxis2=dict(
        title="Unemployment Rate (%)",
        titlefont=dict(color="royalblue"),
        tickfont=dict(color="royalblue"),
        overlaying='y',
        side='right'
    ),
    legend=dict(x=0.01, y=0.99),
    template="plotly_white",
    margin=dict(t=60, r=60, l=60, b=80),
    xaxis_tickangle=45,
    height=600,
    width=1000
)

fig.show()
#fig.write_html("interactive_chart_2.3.html")
#files.download("interactive_chart_2.3.html")


In [None]:
# Step 1: Clean column names
df_d.columns = df_d.columns.str.strip()
df2.columns = df2.columns.str.strip()

# Step 2: Filter df2 for 'Unemployed_2023' values
unemployed_2023 = df2[df2['Attribute'] == 'Unemployed_2023']
unemployed_by_state = unemployed_2023.groupby('State', as_index=False)['Value'].sum()
unemployed_by_state.rename(columns={'Value': 'Unemployed_2023'}, inplace=True)

# Step 3: Ensure df_d has correct column names
df_d = df_d.rename(columns={"Deaths": "Deaths_D"})

# Step 4: Merge the two datasets on 'State'
merged_df = pd.merge(df_d, unemployed_by_state, on='State', how='inner')
merged_df.sort_values("State", inplace=True)

# Step 5: Create the dual-axis chart
fig = go.Figure()

# Left Y-axis: Drug-Induced Deaths
fig.add_trace(go.Scatter(
    x=merged_df["State"],
    y=merged_df["Deaths_D"],
    name="Drug-Induced Deaths",
    mode='lines+markers',
    line=dict(color='crimson'),
    yaxis='y1'
))

# Right Y-axis: Unemployed (2023)
fig.add_trace(go.Scatter(
    x=merged_df["State"],
    y=merged_df["Unemployed_2023"],
    name="Unemployed (2023)",
    mode='lines+markers',
    line=dict(color='darkblue'),
    yaxis='y2'
))

# Step 6: Layout with dual axes
fig.update_layout(
    title="Drug-Induced Deaths vs. Total Unemployed by State (2023)",
    xaxis=dict(title="State"),
    yaxis=dict(
        title="Drug-Induced Deaths",
        titlefont=dict(color="crimson"),
        tickfont=dict(color="crimson")
    ),
    yaxis2=dict(
        title="Total Unemployed (2023)",
        titlefont=dict(color="darkblue"),
        tickfont=dict(color="darkblue"),
        overlaying="y",
        side="right"
    ),
    legend=dict(x=1.02, y=1, xanchor='left'),
    template="plotly_white",
    margin=dict(t=60, r=80, l=60, b=100),
    xaxis_tickangle=45,
    height=600,
    width=1000
)

fig.show()
#fig.write_html("interactive_chart_2.4.html")
#files.download("interactive_chart_2.4.html")

#####Note: The above graph depicts how the drug related deaths and unemployment rates are directly propotional showing how drug abuse could be one of the major factors affecting the employment rates

####Analyzing GDP

In [None]:
df3 = pd.read_csv('/content/GDP_All Years.csv')
df3.head()

Unnamed: 0,GeoFips,GeoName,LineCode,Description,1998,1999,2000,2001,2002,2003,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,0,United States,,Real dollar statistics,,,,,,,...,,,,,,,,,,
1,0,United States,1.0,Real GDP (millions of chained 2017 dollars) 1,12924876,13543774,14096033,14230726,14472712,14877312,...,18799622.0,19141672.0,19612102.0,20193896.0,20715671.0,20267585.0,21494798.0,22034828.0,22671096.0,23305023
2,0,United States,2.0,Real personal income (millions of constant (...,(NA),(NA),(NA),(NA),(NA),(NA),...,15896447.7,16162029.4,16658962.0,17163074.0,17727562.0,18731605.3,19641720.3,19013960.0,19404131.7,(NA)
3,0,United States,3.0,Real PCE (millions of constant (2017) dollar...,(NA),(NA),(NA),(NA),(NA),(NA),...,12638789.0,12949012.2,13290625.5,13654925.4,13948133.0,13594721.8,14787232.3,15236191.5,15621697.3,(NA)
4,0,United States,,Current dollar statistics (millions of dollars),,,,,,,...,,,,,,,,,,


In [None]:
# Clean column names and data
df3.columns = df3.columns.str.strip()
df3['GeoName'] = df3['GeoName'].astype(str).str.strip()
df3['Description'] = df3['Description'].astype(str).str.strip()

# Filter for rows with 'Gross domestic product' and exclude 'United States'
df_gdp_states = df3[
    (df3['GeoName'] != 'United States') &
    (df3['Description'].str.lower().str.contains('gross domestic product'))
]

# Convert 2023 column to numeric
df_gdp_states['2023'] = pd.to_numeric(df_gdp_states['2023'], errors='coerce')

# Drop rows with missing GDP data
df_gdp_states = df_gdp_states.dropna(subset=['2023'])

# Plot bar chart
fig = px.bar(
    df_gdp_states,
    x='GeoName',
    y='2023',
    title='Gross Domestic Product (GDP) by State - 2023',
    labels={'GeoName': 'State', '2023': 'GDP (in millions USD)'},
    template='plotly_white'
)

# Customize chart appearance
fig.update_layout(
    xaxis_tickangle=45,
    height=600,
    width=1000
)

fig.show()
#fig.write_html("interactive_chart_3.1.html")
#files.download("interactive_chart_3.1.html")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# Line Chart

df_us_gdp = df3[
    (df3['GeoName'] == 'United States') &
    (df3['Description'].str.contains('Gross domestic product|Real GDP', case=False, na=False))
]

# Extract and convert GDP values
years = [str(year) for year in range(2018, 2024)]
gdp_values = df_us_gdp[years].iloc[0].replace('(NA)', None)
gdp_values = pd.to_numeric(gdp_values, errors='coerce')

# Create DataFrame
us_gdp_df = pd.DataFrame({
    'Year': years,
    'GDP': gdp_values
})

fig_line = px.line(
    us_gdp_df,
    x='Year',
    y='GDP',
    title='U.S. Gross Domestic Product (2018–2023)',
    markers=True,
    labels={'GDP': 'GDP in Millions USD'},
    template='plotly_white'
)
fig_line.update_traces(mode='lines+markers', line=dict(color='green'))
fig_line.show()
#fig.write_html("interactive_chart_3.2.html")
#files.download("interactive_chart_3.2.html")

In [None]:
import pandas as pd
import plotly.express as px

# Clean column names and values
df3.columns = df3.columns.str.strip()
df3['GeoName'] = df3['GeoName'].astype(str).str.strip()
df3['Description'] = df3['Description'].astype(str).str.strip()

# Step 2: Filter for United States GDP only based on Description
df_us_gdp = df3[
    (df3['GeoName'] == 'United States') &
    (df3['Description'].str.contains('GDP', case=False, na=False))  # Look for 'GDP' in Description
]

# Step 3: Ensure the dataset contains the correct columns for years 2018-2023
years = [str(y) for y in range(2018, 2024)]

# Step 4: Select GDP values for the years 2018 to 2023
if not df_us_gdp.empty:
    gdp_values = df_us_gdp[years].iloc[0].replace('(NA)', None)
    gdp_values = pd.to_numeric(gdp_values, errors='coerce')

    # Step 5: Compute the year-over-year differences (for comparison)
    gdp_diff = gdp_values.diff().fillna(0)

    # Step 6: Replace negative differences with 0
    gdp_diff[gdp_diff < 0] = 0

    # Step 7: Create DataFrame for plotting
    gdp_diff_df = pd.DataFrame({
        'Year': gdp_diff.index,
        'GDP Increase': gdp_diff.values
    })

    # Step 8: Plot the line chart for GDP increase
    fig = px.line(
        gdp_diff_df,
        x='Year',
        y='GDP Increase',
        title='Year-over-Year GDP Increase (2018–2023) - United States',
        markers=True,
        labels={'GDP Increase': 'Increase in GDP (in millions USD)'},
        template='plotly_white'
    )

    fig.update_traces(mode='lines+markers', hovertemplate='Year: %{x}<br>GDP Increase: %{y:,.2f}')
    fig.update_layout(
        xaxis_title='Year',
        yaxis_title='GDP Increase (millions USD)',
        height=500,
        width=800
    )

    fig.show()

else:
    print("No data available for GDP filtering.")
#fig.write_html("interactive_chart_3.3.html")
#files.download("interactive_chart_3.3.html")

###Analyzing Employment data vs GDP Increase for the years 2018 to 2023

In [None]:
import pandas as pd
import plotly.graph_objs as go

# --- GDP INCREASE DATA ---
# Clean columns
df3.columns = df3.columns.str.strip()
df3['GeoName'] = df3['GeoName'].astype(str).str.strip()
df3['Description'] = df3['Description'].astype(str).str.strip()

# Filter for United States GDP
df_us_gdp = df3[
    (df3['GeoName'] == 'United States') &
    (df3['Description'].str.contains('Gross domestic product', case=False, na=False))
]

years = [str(y) for y in range(2017, 2024)]  # Include 2017 for difference calc
gdp_diff_df = pd.DataFrame()

if not df_us_gdp.empty:
    gdp_values = df_us_gdp[years].iloc[0].replace('(NA)', None)
    gdp_values = pd.to_numeric(gdp_values, errors='coerce')
    gdp_diff = gdp_values.diff().fillna(0)
    gdp_diff[gdp_diff < 0] = 0
    gdp_diff_df = pd.DataFrame({
        'Year': gdp_diff.index.astype(int),
        'GDP Increase': gdp_diff.values
    })
    gdp_diff_df = gdp_diff_df[gdp_diff_df['Year'] >= 2018]  # Filter for 2018–2023

# --- UNEMPLOYMENT DATA ---
df2['Attribute'] = df2['Attribute'].astype(str)
unemployment_data = df2[df2['Attribute'].str.contains('unemployment_rate_', case=False)].copy()
unemployment_data['Year'] = unemployment_data['Attribute'].str.extract(r'(\d{4})').astype(int)
unemployment_data = unemployment_data[unemployment_data['Year'].between(2018, 2023)]
unemployment_data['Value'] = pd.to_numeric(unemployment_data['Value'], errors='coerce')

total_unemployed = unemployment_data.groupby('Year')['Value'].sum().reset_index()
total_unemployed.rename(columns={'Value': 'Total Unemployed'}, inplace=True)

# --- MERGE BOTH ---
merged = pd.merge(gdp_diff_df, total_unemployed, on='Year', how='inner')

# --- PLOT DUAL LINE CHART ---
fig = go.Figure()

# GDP Increase
fig.add_trace(go.Scatter(
    x=merged['Year'],
    y=merged['GDP Increase'],
    name='GDP Increase',
    mode='lines+markers',
    line=dict(color='green', width=3),
    yaxis='y1',
    hovertemplate='Year: %{x}<br>GDP Increase: %{y:,.0f}<extra></extra>'
))

# Total Unemployed
fig.add_trace(go.Scatter(
    x=merged['Year'],
    y=merged['Total Unemployed'],
    name='Total Unemployed',
    mode='lines+markers',
    line=dict(color='firebrick', width=3, dash='dot'),
    yaxis='y2',
    hovertemplate='Year: %{x}<br>Unemployed: %{y:,.0f}<extra></extra>'
))

# Layout
fig.update_layout(
    title='GDP Increase vs. Total Unemployment (2018–2023)',
    xaxis=dict(title='Year'),
    yaxis=dict(
        title='GDP Increase (millions USD)',
        titlefont=dict(color='green'),
        tickfont=dict(color='green')
    ),
    yaxis2=dict(
        title='Total Unemployed',
        titlefont=dict(color='firebrick'),
        tickfont=dict(color='firebrick'),
        overlaying='y',
        side='right'
    ),
    legend=dict(
        x=1.02,
        y=1,
        xanchor='left',
        yanchor='top',
        orientation='v'
    ),
    template='plotly_white',
    hovermode='x unified',
    margin=dict(t=100, r=120),  # extra margin on the right for legend space
    height=600
)


fig.show()
#fig.write_html("interactive_chart_3.4.html")
#files.download("interactive_chart_3.4.html")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#####Note: The above chart depicts how the gdp increase is inversely propotional to the unemployment rates. Proving that increased unemployment rates affect the gdp increase