In [3]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import folium
from statsmodels.formula.api import ols

In [4]:
# Step 2: Load Datasets
covid_mobility = pd.read_csv(r"C:\Users\bunny\Downloads\archive (1)\community_dataset_with_infection_count.csv")
gdp = pd.read_csv(r"C:\Users\bunny\Downloads\archive (2)\Per Capita GDP of All Countries 1970 to 2022.csv")

In [5]:
# Step 3: Data Cleaning

# Convert date
covid_mobility['DATE_VAL'] = pd.to_datetime(covid_mobility['DATE_VAL'], format='%d-%m-%y', errors='coerce')

# Keep relevant columns
covid_mobility = covid_mobility[['COUNTRY_REGION','DATE_VAL','RETAIL_AND_RECREATION_PCT','WORKPLACES_PCT','RESIDENTIAL_PCT','COVID_CONFIRMED','COVID_DEATHS']]

# Compute average mobility decline per country (2020)
mobility_2020 = covid_mobility[covid_mobility['DATE_VAL'].dt.year==2020]
mobility_summary = mobility_2020.groupby('COUNTRY_REGION').agg({
    'RETAIL_AND_RECREATION_PCT':'mean',
    'WORKPLACES_PCT':'mean',
    'RESIDENTIAL_PCT':'mean'
}).reset_index()
mobility_summary['avg_mobility_decline'] = mobility_summary[['RETAIL_AND_RECREATION_PCT','WORKPLACES_PCT']].mean(axis=1)

# GDP contraction from 2019 → 2020
gdp_latest = gdp[['Country','2019','2020']].dropna()
gdp_latest['GDP_change_pct'] = ((gdp_latest['2020'] - gdp_latest['2019'])/gdp_latest['2019'])*100


In [6]:
# Step 4: Merge Mobility & GDP Data
merged = pd.merge(mobility_summary, gdp_latest, left_on='COUNTRY_REGION', right_on='Country', how='inner')

In [8]:
# tep 5: Correlation & Regression
print("Correlation between mobility decline and GDP contraction:")
print(merged[['avg_mobility_decline','GDP_change_pct']].corr())

model = ols('GDP_change_pct ~ avg_mobility_decline', data=merged).fit()
print(model.summary())

Correlation between mobility decline and GDP contraction:
                      avg_mobility_decline  GDP_change_pct
avg_mobility_decline              1.000000        0.090442
GDP_change_pct                    0.090442        1.000000
                            OLS Regression Results                            
Dep. Variable:         GDP_change_pct   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.8000
Date:                Mon, 28 Jul 2025   Prob (F-statistic):              0.373
Time:                        05:37:00   Log-Likelihood:                -357.36
No. Observations:                  99   AIC:                             718.7
Df Residuals:                      97   BIC:                             723.9
Df Model:                           1                                         
Covariance Type:            nonrobust                 

In [None]:
# Step 6: Visualizations
# Scatter plot with regression line
fig = px.scatter(merged, x='avg_mobility_decline', y='GDP_change_pct', text='Country', trendline='ols',
                 title='GDP Contraction vs Mobility Decline (2020)')
fig.update_traces(textposition='top center')
fig.show()

# Time-series example for one country
usa_trend = covid_mobility[covid_mobility['COUNTRY_REGION']=='United States']
fig2 = px.line(usa_trend, x='DATE_VAL', y='RETAIL_AND_RECREATION_PCT',
               title='US Retail Mobility Trend (2020)')
fig2.show()