# CoV-19 Preprocessing & Prediction 

## Mehrdad Heshmat

### Section 1: Importing Required Libraries

In [1]:
import pandas as pd
from scipy.stats import ttest_rel
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FormatStrFormatter

%matplotlib inline
pd.set_option("display.max_rows",None,"display.max_columns",None)


### Initializing output-paths:

In [2]:
import os
if not os.path.isdir(".\\output") or not os.path.isdir(".\\output\\charts"):
    try:
        os.makedirs(".\\output\\charts")
        os.makedirs(".\\output\\preprocessed_Dataframe")
        
    except FileExistsError:
        print(FileExistsError)

### Section 2: Loading the Data & Exploring It

In [3]:
df  = pd.read_csv(".\SARS-CoV-2_Dataset_main.csv")

In [4]:
df.shape

(121953, 9)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121953 entries, 0 to 121952
Data columns (total 9 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   continent                116402 non-null  object 
 1   location                 121953 non-null  object 
 2   date                     121953 non-null  object 
 3   total_cases              115720 non-null  float64
 4   total_deaths             104903 non-null  float64
 5   total_vaccinations       28228 non-null   float64
 6   people_vaccinated        26813 non-null   float64
 7   people_fully_vaccinated  23715 non-null   float64
 8   population               121088 non-null  float64
dtypes: float64(6), object(3)
memory usage: 8.4+ MB


# Preprocessing 

### Task1:


In [6]:
df['total_vaccinations']=df['total_vaccinations'].fillna(0)
df['people_vaccinated']=df['people_vaccinated'].fillna(0)
df['people_fully_vaccinated']=df['people_fully_vaccinated'].fillna(0)


In [7]:
df = df[df.location != 'International']
df = df[df.location !=  'Northern Cyprus']

In [8]:
countries_set = set(df.location)
for cont in countries_set:
    df.loc[(df.location == cont) ,'people_fully_vaccinated' ]=df.loc[(df.location == cont)]['people_fully_vaccinated'].replace(0 ,method='ffill')
    df.loc[(df.location == cont) ,'total_vaccinations' ]=df.loc[(df.location == cont)]['total_vaccinations'].replace(0 ,method='ffill')


In [9]:
df.to_csv('output\\preprocessed_Dataframe\\' + 'preprocessed_df-Stage 1.csv')

### Task2:

In [10]:
df['people_fully_vaccinated_per_population'] = (df.people_fully_vaccinated * 100) / df.population
df['total_deaths_per_population'] = (df.total_deaths * 100) / df.population
df['total_cases_per_population'] = (df.total_cases * 100) / df.population



In [11]:
df['people_fully_vaccinated_per_population']= df['people_fully_vaccinated_per_population'].fillna(0)
df['total_deaths_per_population'] = df['total_deaths_per_population'].fillna(0)
df['total_cases_per_population']=df['total_cases_per_population'].fillna(0)
df['total_deaths']=df['total_deaths'].fillna(0)
df['total_cases']=df['total_cases'].fillna(0)

### Task3:

In [14]:
df.date = pd.to_datetime(df.date, format='%m/%d/%Y')

In [15]:
print(set(df.location))

{'Cayman Islands', 'Bolivia', 'Mauritania', 'Bosnia and Herzegovina', 'Suriname', 'Finland', 'Isle of Man', 'Maldives', 'Portugal', 'Ecuador', 'South Korea', 'Spain', 'Uganda', 'Ukraine', 'Jordan', 'Aruba', 'Georgia', 'Anguilla', 'El Salvador', 'Indonesia', 'Marshall Islands', 'Africa', 'Eritrea', 'Egypt', 'Bonaire Sint Eustatius and Saba', 'Taiwan', 'Azerbaijan', 'Mongolia', 'Zambia', 'Madagascar', 'Belarus', 'Pakistan', 'Qatar', 'Philippines', 'Sweden', 'Ethiopia', 'Mexico', 'Yemen', 'Iran', 'Tanzania', 'Brunei', 'Europe', 'Cambodia', 'Fiji', 'Uzbekistan', 'Somalia', 'Greenland', 'Iraq', 'Mali', 'Kazakhstan', 'Guinea', 'Moldova', 'Timor', 'Papua New Guinea', 'New Caledonia', 'Slovakia', "Cote d'Ivoire", 'Bhutan', 'Nigeria', 'Italy', 'Andorra', 'Burundi', 'Denmark', 'Tuvalu', 'Norway', 'Laos', 'World', 'Comoros', 'Netherlands', 'Uruguay', 'Gabon', 'Rwanda', 'United Kingdom', 'Australia', 'Micronesia (country)', 'Sri Lanka', 'Kyrgyzstan', 'Turkey', 'United States', 'Venezuela', 'Domini

### Saving Preprocessed DataFrame

In [33]:
df.to_csv('output\\preprocessed_Dataframe\\' + 'preprocessed_SARS-CoV-2_Dataset_main.csv')

# Prediction

### Importing Required Libraries

In [None]:
df  = pd.read_csv(".\\output\\preprocessed_Dataframe\SARS-CoV-2_Dataset_main.csv")