In [2]:
import pandas as pd
import numpy as np

## Covid 19

In [114]:
raw_covid_data = pd.read_csv("/Pandemic-Database/Covid 19/COVID-19_Reported_Patient_Impact_and_Hospital_Capacity_by_State_Timeseries__RAW_.csv")

geo_column = ['state']

date_column = ['date']

case_column = ['total_adult_patients_hospitalized_confirmed_covid','total_adult_patients_hospitalized_confirmed_and_suspected_covid']

clean_covid_data = raw_covid_data[geo_column + date_column + case_column]

clean_covid_data.insert(0,'Region','United States')

clean_covid_data.insert(0,'Virus','SARS-CoV-2')

clean_covid_data = clean_covid_data.rename(columns = {"state": "Sub-region",
                                                      "date": "Date",
                                                      "total_adult_patients_hospitalized_confirmed_covid": "Confirmed", 
                                                      "total_adult_patients_hospitalized_confirmed_and_suspected_covid": "Suspected"})

clean_covid_data['Suspected'] = clean_covid_data['Suspected'] - clean_covid_data['Confirmed']

clean_covid_data = pd.melt(clean_covid_data, 
                           id_vars=['Virus','Region','Sub-region','Date'], 
                           value_vars = ['Confirmed','Suspected'])

clean_covid_data = clean_covid_data.rename(columns = {"variable": "Type",
                                                      "value": "Case Number"})

clean_covid_data = clean_covid_data.assign(Comments = np.nan)

clean_covid_data = clean_covid_data.sort_values(by = ["Region","Sub-region","Date"])

clean_covid_data = clean_covid_data.dropna(subset=['Case Number'])

clean_covid_data

Unnamed: 0,Virus,Region,Sub-region,Date,Type,Case Number,Comments
18118,SARS-CoV-2,United States,AK,2020/07/15,Confirmed,11.0,
82064,SARS-CoV-2,United States,AK,2020/07/15,Suspected,17.0,
14471,SARS-CoV-2,United States,AK,2020/07/16,Confirmed,14.0,
78417,SARS-CoV-2,United States,AK,2020/07/16,Suspected,17.0,
15523,SARS-CoV-2,United States,AK,2020/07/17,Confirmed,13.0,
...,...,...,...,...,...,...,...
77086,SARS-CoV-2,United States,WY,2023/06/01,Suspected,0.0,
21417,SARS-CoV-2,United States,WY,2023/06/02,Confirmed,3.0,
85363,SARS-CoV-2,United States,WY,2023/06/02,Suspected,2.0,
16425,SARS-CoV-2,United States,WY,2023/06/03,Confirmed,3.0,


In [115]:
clean_covid_data.to_csv('/Pandemic-Database/Covid 19/Covid-19_Clean_Data.csv',
                       index=False)

## Dengue Fever

## Ebola

In [77]:
pd.read_excel('/Pandemic-Database/Ebola/CDC-counts.xlsx')

Unnamed: 0,WHO report date,"Total Cases, Guinea","Total Deaths, Guinea","Total Cases, Liberia","Total Deaths, Liberia","Total Cases, Sierra Leone","Total Deaths, Sierra Leone",Total Cases,Total Deaths
0,2016-04-13,3814,2544,10678,4810,14124,3956,28616,11310
1,2016-03-30,3811,2543,10675,4809,14124,3956,28610,11308
2,2016-03-23,3809,2540,10675,4809,14124,3956,28608,11305
3,2016-03-03,3804,2536,10675,4809,14124,3956,28603,11301
4,2016-02-17,3804,2536,10675,4809,14124,3956,28603,11301
...,...,...,...,...,...,...,...,...,...
260,2014-04-01,122,80,8,2,0,0,130,82
261,2014-03-31,112,70,8,6,0,0,120,76
262,2014-03-27,103,66,8,6,6,5,117,77
263,2014-03-26,86,60,0,0,0,0,86,60


In [116]:
early_cdc_data = pd.read_excel('/Pandemic-Database/Ebola/CDC-counts.xlsx')

early_cdc_data = early_cdc_data.rename(columns={"Total Cases, Guinea": "Confirmed, Guinea",
                                                'Total Deaths, Guinea': "Deaths, Guinea",
                                                'Total Cases, Liberia': "Confirmed, Liberia",
                                                'Total Deaths, Liberia': "Deaths, Liberia",
                                                'Total Cases, Sierra Leone': 'Confirmed, Sierra Leone',
                                                'Total Deaths, Sierra Leone': 'Deaths, Sierra Leone'
                                               })

early_cdc_data = pd.melt(early_cdc_data, 
                         id_vars=['WHO report date','Total Cases','Total Deaths'], 
                         value_vars = ['Confirmed, Guinea','Deaths, Guinea','Confirmed, Liberia','Deaths, Liberia','Confirmed, Sierra Leone','Deaths, Sierra Leone'])

early_cdc_data[['Type','Sub-region']] = early_cdc_data['variable'].str.split(",",expand=True)

early_cdc_data = early_cdc_data.assign(Region = "Africa")

early_cdc_data = early_cdc_data.assign(Virus = "Ebolavirus")

early_cdc_data = early_cdc_data.assign(Comments = np.nan)

early_cdc_data['WHO report date'] = pd.to_datetime(early_cdc_data['WHO report date'])

early_cdc_data = early_cdc_data[['Virus','Region','Sub-region','WHO report date','Type','value','Comments']]

early_cdc_data = early_cdc_data.rename(columns={"WHO report date":"Date",
                                               "value": "Case Number"})

early_cdc_data = early_cdc_data.sort_values(by = ["Region","Sub-region","Date"])

early_cdc_data = early_cdc_data.dropna(subset=['Case Number'])

early_cdc_data

Unnamed: 0,Virus,Region,Sub-region,Date,Type,Case Number,Comments
264,Ebolavirus,Africa,Guinea,2014-03-25,Confirmed,86,
529,Ebolavirus,Africa,Guinea,2014-03-25,Deaths,59,
263,Ebolavirus,Africa,Guinea,2014-03-26,Confirmed,86,
528,Ebolavirus,Africa,Guinea,2014-03-26,Deaths,60,
262,Ebolavirus,Africa,Guinea,2014-03-27,Confirmed,103,
...,...,...,...,...,...,...,...
1327,Ebolavirus,Africa,Sierra Leone,2016-03-23,Deaths,3956,
1061,Ebolavirus,Africa,Sierra Leone,2016-03-30,Confirmed,14124,
1326,Ebolavirus,Africa,Sierra Leone,2016-03-30,Deaths,3956,
1060,Ebolavirus,Africa,Sierra Leone,2016-04-13,Confirmed,14124,


In [120]:
early_cdc_data.to_csv('/Pandemic-Database/Ebola/Ebola_CDC_Confirmed_and_Death_Clean_Data.csv',
                      index=False)

In [126]:
late_guinea_data = pd.read_csv('/Pandemic-Database/Ebola/Processed_Data/guinea_full_data.csv',
                              index_col = "Unnamed: 0")

late_liberia_data = pd.read_csv('/Pandemic-Database/Ebola/Processed_Data/liberia_full_data.csv',
                               index_col = "Unnamed: 0")

late_sierraleone_data = pd.read_csv('/Pandemic-Database/Ebola/Processed_Data/sierraleone_full_data.csv',
                                   index_col = "Unnamed: 0")

late_data = late_guinea_data.append(late_liberia_data).append(late_sierraleone_data)

late_suspected_data = late_data[late_data['Case definition'] == ' Suspected']

late_suspected_data = late_suspected_data.reset_index(drop = True)

## There is a space in front of the case definition
late_suspected_data['Case definition'] = late_suspected_data['Case definition'].str[1:]

late_suspected_data = late_suspected_data.rename(columns={'Country':'Sub-region',
                                                          'Data as of': 'Date',
                                                          'Case definition': 'Type',
                                                          'Number of cases Cumulative': 'Case Number'})

late_suspected_data = late_suspected_data[late_suspected_data['Case Number'] != 'Not reported.']

late_suspected_data = late_suspected_data.assign(Region = "Africa")

late_suspected_data = late_suspected_data.assign(Virus = "Ebolavirus")

late_suspected_data = late_suspected_data.assign(Comments = np.nan)

late_suspected_data = late_suspected_data[['Virus','Region','Sub-region','Date','Type','Case Number','Comments']]

late_suspected_data = late_suspected_data.dropna(subset=['Case Number'])

late_suspected_data = late_suspected_data.drop_duplicates()

late_suspected_data

Unnamed: 0,Virus,Region,Sub-region,Date,Type,Case Number,Comments
0,Ebolavirus,Africa,Guinea,2014-11-25,Suspected,21,
1,Ebolavirus,Africa,Guinea,2014-11-28,Suspected,24,
2,Ebolavirus,Africa,Guinea,2014-11-30,Suspected,25,
4,Ebolavirus,Africa,Guinea,2014-12-02,Suspected,27,
5,Ebolavirus,Africa,Guinea,2014-12-03,Suspected,21,
...,...,...,...,...,...,...,...
981,Ebolavirus,Africa,Sierra Leone,2015-11-07,Suspected,5131,
1038,Ebolavirus,Africa,Sierra Leone,2016-01-17,Suspected,0,
1039,Ebolavirus,Africa,Sierra Leone,2016-01-24,Suspected,0,
1040,Ebolavirus,Africa,Sierra Leone,2016-01-31,Suspected,0,


In [127]:
late_suspected_data.to_csv('/Pandemic-Database/Ebola/Ebola_Suspected_Clean_Data.csv',
                           index=False)

## Influenza

## Monkeypox

## Norovirus

## Zika Virus