In [1]:
import pandas as pd
import plotly_express as px

file_path = "Data/ECDC_Global_Data_Covid.csv"
df = pd.read_csv(file_path)

df

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note
0,Afghanistan,AFG,Asia,38928341,cases,0.0,2020-01,,0.0,Epidemic intelligence national data,
1,Afghanistan,AFG,Asia,38928341,cases,0.0,2020-02,0.000000,0.0,Epidemic intelligence national data,
2,Afghanistan,AFG,Asia,38928341,cases,0.0,2020-03,0.000000,0.0,Epidemic intelligence national data,
3,Afghanistan,AFG,Asia,38928341,cases,0.0,2020-04,0.000000,0.0,Epidemic intelligence national data,
4,Afghanistan,AFG,Asia,38928341,cases,0.0,2020-05,0.000000,0.0,Epidemic intelligence national data,
...,...,...,...,...,...,...,...,...,...,...,...
53069,Cyprus,CYP,Europe,896007,deaths,20.0,2022-20,44.642508,1209.0,TESSy COVID-19,
53070,Cyprus,CYP,Europe,896007,deaths,9.0,2022-21,32.365819,1218.0,TESSy COVID-19,
53071,Cyprus,CYP,Europe,896007,deaths,,2022-22,,,TESSy COVID-19,
53072,Cyprus,CYP,Europe,896007,deaths,0.0,2022-23,,,TESSy COVID-19,


In [2]:
print(f"{len(df['year_week'].unique()) = } weeks represented") # 129 weeks represented
print(f"{len(df['country'].unique()) = } countries represented") # 228 countries represented
print(f"{len(df['indicator'].unique()) = } indicators (cases / deaths) represented") # 2 indicators (cases / deaths) represented

len(df['year_week'].unique()) = 129 weeks represented
len(df['country'].unique()) = 228 countries represented
len(df['indicator'].unique()) = 2 indicators (cases / deaths) represented


---
Finding the most recent week with information from all countries

In [3]:
df_most_recent = df[df["year_week"] == "2022-24"] # most recent week
df_most_recent # obviously not all data is updated for this week

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note
52944,Cyprus,CYP,Europe,896007,cases,8847.0,2022-24,1753.446123,,TESSy COVID-19,
53073,Cyprus,CYP,Europe,896007,deaths,5.0,2022-24,5.580314,,TESSy COVID-19,


In [4]:
df_most_recent = df[df["year_week"] == "2022-23"] # second most recent week
df_most_recent # all countries represented (from above numbers 228 countries * 2 cases = 456 rows)

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note
126,Afghanistan,AFG,Asia,38928341,cases,432.0,2022-23,2.424968,181120.0,Epidemic intelligence national data,
253,Afghanistan,AFG,Asia,38928341,deaths,1.0,2022-23,0.231194,7710.0,Epidemic intelligence national data,
380,Africa (total),,Africa,1339423921,cases,33185.0,2022-23,5.425691,11937718.0,Epidemic intelligence national data,
507,Africa (total),,Africa,1339423921,deaths,251.0,2022-23,0.413611,254467.0,Epidemic intelligence national data,
625,Albania,ALB,Europe,2829741,cases,330.0,2022-23,22.263522,276731.0,Epidemic intelligence national data,
...,...,...,...,...,...,...,...,...,...,...,...
52583,Zambia,ZMB,Africa,18383956,deaths,1.0,2022-23,0.163186,3989.0,Epidemic intelligence national data,
52699,Zimbabwe,ZWE,Africa,14862927,cases,758.0,2022-23,13.880173,254155.0,Epidemic intelligence national data,
52815,Zimbabwe,ZWE,Africa,14862927,deaths,9.0,2022-23,1.345630,5521.0,Epidemic intelligence national data,
52943,Cyprus,CYP,Europe,896007,cases,6864.0,2022-23,,,TESSy COVID-19,


# Relative Cumulatives by Population Size

In [5]:
pd.options.mode.chained_assignment = None # disables warning message, TODO: look into alternative methods to avoid warning

# df_most_recent["relative_weekly_count"] = df_most_recent["weekly_count"] / df_most_recent["population"]
df_most_recent["relative_cumulative_count"] = df_most_recent["cumulative_count"] / df_most_recent["population"]
df_most_recent

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note,relative_cumulative_count
126,Afghanistan,AFG,Asia,38928341,cases,432.0,2022-23,2.424968,181120.0,Epidemic intelligence national data,,0.004653
253,Afghanistan,AFG,Asia,38928341,deaths,1.0,2022-23,0.231194,7710.0,Epidemic intelligence national data,,0.000198
380,Africa (total),,Africa,1339423921,cases,33185.0,2022-23,5.425691,11937718.0,Epidemic intelligence national data,,0.008913
507,Africa (total),,Africa,1339423921,deaths,251.0,2022-23,0.413611,254467.0,Epidemic intelligence national data,,0.000190
625,Albania,ALB,Europe,2829741,cases,330.0,2022-23,22.263522,276731.0,Epidemic intelligence national data,,0.097794
...,...,...,...,...,...,...,...,...,...,...,...,...
52583,Zambia,ZMB,Africa,18383956,deaths,1.0,2022-23,0.163186,3989.0,Epidemic intelligence national data,,0.000217
52699,Zimbabwe,ZWE,Africa,14862927,cases,758.0,2022-23,13.880173,254155.0,Epidemic intelligence national data,,0.017100
52815,Zimbabwe,ZWE,Africa,14862927,deaths,9.0,2022-23,1.345630,5521.0,Epidemic intelligence national data,,0.000371
52943,Cyprus,CYP,Europe,896007,cases,6864.0,2022-23,,,TESSy COVID-19,,


### Calculating and plotting cases

In [6]:
df_most_cases_rel = df_most_recent[df_most_recent["indicator"] == "cases"].sort_values(by="relative_cumulative_count", ascending=False).reset_index(drop = True)
df_most_cases_rel.head()

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note,relative_cumulative_count
0,Faroe Islands,FRO,Europe,48865,cases,0.0,2022-23,0.0,34658.0,Epidemic intelligence national data,,0.70926
1,Andorra,AND,Europe,76177,cases,157.0,2022-23,433.201623,43224.0,Epidemic intelligence national data,,0.567415
2,Gibraltar,GIB,Europe,33691,cases,246.0,2022-23,1148.67472,18806.0,Epidemic intelligence national data,,0.558191
3,Iceland,ISL,Europe,368792,cases,1412.0,2022-23,697.140936,192039.0,TESSy COVID-19,,0.520724
4,Lithuania,LTU,Europe,2795680,cases,936.0,2022-23,60.557718,1402060.0,TESSy COVID-19,,0.501509


In [7]:
# with Sweden in position 60 with ~0.24 relative cases
df_most_cases_rel[df_most_cases_rel["country"] == "Sweden"]

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note,relative_cumulative_count
60,Sweden,SWE,Europe,10379295,cases,1899.0,2022-23,30.435593,2511512.0,TESSy COVID-19,,0.241973


In [8]:
px.bar(df_most_cases_rel.head(50), x = "country", y = "relative_cumulative_count")

### Calculating and plotting deaths

In [9]:
df_most_deaths_rel = df_most_recent[df_most_recent["indicator"] == "deaths"].sort_values(by = "relative_cumulative_count", ascending = False).reset_index(drop = True)
df_most_deaths_rel.head()

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note,relative_cumulative_count
0,Peru,PER,America,32971846,deaths,69.0,2022-23,4.609994,213338.0,Epidemic intelligence national data,,0.00647
1,Bulgaria,BGR,Europe,6916548,deaths,33.0,2022-23,9.10859,37196.0,TESSy COVID-19,,0.005378
2,Bosnia And Herzegovina,BIH,Europe,3280815,deaths,4.0,2022-23,2.438419,15795.0,Epidemic intelligence national data,,0.004814
3,Hungary,HUN,Europe,9730772,deaths,9.0,2022-23,3.083003,45379.0,TESSy COVID-19,,0.004663
4,North Macedonia,MKD,Europe,2068808,deaths,7.0,2022-23,6.283812,9316.0,Epidemic intelligence national data,,0.004503


In [10]:
# with Sweden in position 58 with ~0.0018 relative deaths
df_most_deaths_rel[df_most_deaths_rel["country"] == "Sweden"]

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source,note,relative_cumulative_count
58,Sweden,SWE,Europe,10379295,deaths,1.0,2022-23,1.926913,19049.0,TESSy COVID-19,,0.001835


In [11]:
px.bar(df_most_deaths_rel.head(50), x = "country", y = "relative_cumulative_count")