In [1]:
import pandas as pd

# Load the COVID-19 dataset
covid_df = pd.read_csv('synthetic_covid_data.csv')

# Load the GDP dataset
gdp_df = pd.read_csv('synthetic_gdp_data.csv')


print("COVID-19 Data Head:")
print(covid_df.head(5))
print("\nGDP Growth Data Head:")
print(gdp_df.head(5))


covid_df['month'] = pd.to_datetime(covid_df['month'])


covid_df.drop_duplicates(inplace=True)
gdp_df.drop_duplicates(inplace=True)


covid_df['active_cases'] = covid_df['confirmed_cases'] - covid_df['recovered']


covid_df['quarter'] = covid_df['month'].dt.to_period('Q')


aggregated_covid_data = covid_df.groupby(['country', 'quarter']).agg(
    total_confirmed_cases=('confirmed_cases', 'sum'),
    total_deaths=('deaths', 'sum'),
    total_active_cases=('active_cases', 'sum')
).reset_index()


print("\nAggregated COVID-19 Data:")
print(aggregated_covid_data.head())


gdp_df['quarter'] = pd.to_datetime(gdp_df['quarter']).dt.to_period('Q')


merged_data = pd.merge(aggregated_covid_data, gdp_df, on=['country', 'quarter'])


print("\nMerged Data:")
print(merged_data.head())


gdp_growth_by_country = merged_data.groupby('country')['gdp_growth'].sum().idxmax()
print(f"\nThe country with the highest overall GDP growth during the pandemic is: {gdp_growth_by_country}")


total_cases_country = covid_df.groupby('country')['confirmed_cases'].sum().idxmax()
total_deaths_country = covid_df.groupby('country')['deaths'].sum().idxmax()

print(f"\nThe country with the highest total confirmed cases is: {total_cases_country}")
print(f"The country with the highest total deaths is: {total_deaths_country}")


COVID-19 Data Head:
        month country  confirmed_cases  deaths  recovered
0  2020-01-31     USA            89247     669      73297
1  2020-02-29     USA            77841     298      10479
2  2020-03-31     USA            31452     101      73205
3  2020-04-30     USA            95109      43      64972
4  2020-05-31     USA             7069      61      38987

GDP Growth Data Head:
  country     quarter  gdp_growth
0     USA  2020-03-31    4.020164
1     USA  2020-06-30   -4.113467
2     USA  2020-09-30    3.121854
3     USA  2020-12-31   -7.161340
4     USA  2021-03-31    1.860803

Aggregated COVID-19 Data:
  country quarter  total_confirmed_cases  total_deaths  total_active_cases
0  Brazil  2020Q1                 131220          1582              -31407
1  Brazil  2020Q2                 136530          1041               62119
2  Brazil  2020Q3                 173792           488              -30307
3  Brazil  2020Q4                 167172          1654              -20095
4  