# Nigeria COVID-19 Data Prep Tool

### 1. Import Python module

In [7]:
# import modules
import datetime
from datetime import date

import numpy as np
import pandas as pd

import geopandas as gpd
from geopandas import GeoDataFrame as gdf

import fiona
import json

### 2. Prep States SHP data

In [8]:
# read states shapefile data
states_shp_df = gpd.read_file("../data/shp/ncdc-covid19-states.shp")

# calculate number of ACTIVE cases
states_shp_df['ACTIVE'] = states_shp_df['CASES'] - (states_shp_df['RECOVERED'] + states_shp_df['DEATHS'])

# reorder columns
states_shp_df = states_shp_df[["OBJECTID", "CODE", "STATE", "ADMIN_NAME", "GEO_ZONE", "AREA_SQKM", "POP_2016", "CENTER_Y", "CENTER_X", "CASES", "DEATHS", "RECOVERED", "SCREENED", "ACTIVE", "geometry"]] 

# export to shp
states_shp_df.to_file("../data/shp/ncdc-covid19-states.shp")

# export to geojson
states_shp_df.to_file("../data/geojson/ncdc-covid19-states.geojson", driver='GeoJSON')

# export to csv
states_shp_df.rename(columns={'CENTER_Y':'LAT', 'CENTER_X':'LONG'}, inplace=True)
states_shp_df.drop('geometry',axis=1).to_csv("../data/csv/ncdc-covid19-states.csv") 

# export to json
states_shp_df = pd.read_csv("../data/csv/ncdc-covid19-states.csv", index_col=0)
states_shp_df.to_json("../data/json/ncdc-covid19-states.json", orient='records')

# view data
states_shp_df.head()

Unnamed: 0,OBJECTID,CODE,STATE,ADMIN_NAME,GEO_ZONE,AREA_SQKM,POP_2016,LAT,LONG,CASES,DEATHS,RECOVERED,SCREENED,ACTIVE
0,1,NG001,Abia,Abia State,South East,4858.882335,3644714,5.453302,7.52319,2,0,1,0,1
1,2,NG002,Adamawa,Adamawa State,North East,37924.98786,4145684,9.323227,12.400241,12,0,0,2,12
2,3,NG003,Akwa Ibom,Akwa Ibom State,South South,6723.202769,5353609,4.907245,7.846395,16,2,10,1,4
3,4,NG004,Anambra,Anambra State,South East,4807.933352,5425334,6.222776,6.932186,1,0,1,0,0
4,5,NG005,Bauchi,Bauchi State,North East,48496.40051,6386388,10.796647,9.990588,80,0,6,27,74


### 3. Prep DAILYUPDATES csv data

In [9]:
# load dailyupdates csv data
df = pd.read_csv("../data/csv/ncdc-covid19-dailyupdates.csv")

# convert the 'Date' column to datetime format 
# df = df.reset_index()
# df['DATE']= pd.to_datetime(df['DATE'])

# export to json
df.to_json("../data/json/ncdc-covid19-dailyupdates.json", orient='records', date_format = 'iso')

# export to shapefile
df_shp = gdf(df, geometry=gpd.points_from_xy(df.RECOVERED, df.RECOVERED))
df_shp.to_file("./tmp/ncdc-covid19-dailyupdates.shp")

# view data
df_shp.tail()

Unnamed: 0,DATE,TOTAL CONFIRMED,NEW CASES,ACTIVE CASES,DEATHS,RECOVERED,geometry
63,4/30/2020,1932,204,1555,58,319,POINT (319.000 319.000)
64,5/1/2020,2168,236,1749,68,351,POINT (351.000 351.000)
65,5/2/2020,2388,220,1918,85,385,POINT (385.000 385.000)
66,5/3/2020,2558,170,2071,87,400,POINT (400.000 400.000)
67,5/4/2020,2558,245,2048,93,417,POINT (417.000 417.000)


### 4. Prep States Daily CASES csv data

In [10]:
# update daily cases from shapefile
df = pd.read_csv("../data/csv/ncdc-covid19-states-daily-cases.csv", index_col=0)
states_shp_df = gpd.read_file("../data/shp/ncdc-covid19-states.shp")

# todays date
today_date = date.today().strftime("%m/%d/%Y")

# delete new_row if exists
if str(today_date) in df.index:
    df.drop(str(today_date))
    
# create array of all new cases
values = []
for index, row in states_shp_df.iterrows():
    values.append(row['CASES'])

# add new row to df
df.loc[str(today_date)] = values

# convert the 'Date' column to datetime format 
df = df.reset_index()
df['Date']= pd.to_datetime(df['Date']) 

# export to csv
df.to_csv('../data/csv/ncdc-covid19-states-daily-cases.csv')

# view data
df.tail()

Unnamed: 0,Date,Abia,Adamawa,Akwa Ibom,Anambra,Bauchi,Bayelsa,Benue,Borno,Cross River,...,Ogun,Ondo,Osun,Oyo,Plateau,Rivers,Sokoto,Taraba,Yobe,Zamfara
63,2020-04-30,2,2,16,1,38,5,1,66,0,...,56,9,34,23,1,13,36,8,1,4
64,2020-05-01,2,4,16,1,48,5,1,69,0,...,56,13,34,29,1,14,41,8,4,9
65,2020-05-02,2,4,16,1,53,5,1,75,0,...,56,13,36,33,3,14,54,8,13,12
66,2020-05-03,2,6,16,1,71,5,1,82,0,...,80,13,36,34,3,14,66,8,13,12
67,2020-05-04,2,12,16,1,80,5,2,100,0,...,85,13,37,39,3,14,66,8,13,13


### 5. Prep States Daily RECOVERED csv data

In [11]:
# update daily cases from shapefile
df = pd.read_csv("../data/csv/ncdc-covid19-states-daily-recovered.csv", index_col=0)
states_shp_df = gpd.read_file("../data/shp/ncdc-covid19-states.shp")

# todays date
today_date = date.today().strftime("%m/%d/%Y")

# delete new_row if exists
if str(today_date) in df.index:
    df.drop(str(today_date))

# create array of all new cases
values = []
for index, row in states_shp_df.iterrows():
    values.append(row['RECOVERED'])

# add new row to df
df.loc[str(today_date)] = values

# export to csv
df.to_csv('../data/csv/ncdc-covid19-states-daily-recovered.csv')

# view data
df.tail()

Unnamed: 0_level_0,Abia,Adamawa,Akwa Ibom,Anambra,Bauchi,Bayelsa,Benue,Borno,Cross River,Delta,...,Ogun,Ondo,Osun,Oyo,Plateau,Rivers,Sokoto,Taraba,Yobe,Zamfara
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4/30/2020,0,0,10,1,6,0,0,0,0,4,...,8,3,18,9,0,2,1,0,0,0
5/1/2020,0,0,10,1,6,0,0,0,0,4,...,8,3,18,9,0,2,1,0,0,0
5/2/2020,0,0,10,1,6,0,0,0,0,4,...,9,3,22,9,0,2,1,0,0,0
5/3/2020,1,0,10,1,6,0,0,0,0,4,...,10,3,22,9,0,2,1,0,0,0
05/04/2020,1,0,10,1,6,0,0,0,0,2,...,12,3,30,9,0,2,1,0,0,0


### 6. Prep States Daily DEATHS csv data

In [12]:
# update daily cases from shapefile
df = pd.read_csv("../data/csv/ncdc-covid19-states-daily-deaths.csv", index_col=0)
states_shp_df = gpd.read_file("../data/shp/ncdc-covid19-states.shp")

# todays date
today_date = date.today().strftime("%m/%d/%Y")

# delete new_row if exists
if str(today_date) in df.index:
    df.drop(str(today_date))

# create array of all new cases
values = []
for index, row in states_shp_df.iterrows():
    values.append(row['DEATHS'])

# add new row to df
df.loc[str(today_date)] = values

# export to csv
df.to_csv('../data/csv/ncdc-covid19-states-daily-deaths.csv')

# view data
df.tail()

Unnamed: 0_level_0,Abia,Adamawa,Akwa Ibom,Anambra,Bauchi,Bayelsa,Benue,Borno,Cross River,Delta,...,Ogun,Ondo,Osun,Oyo,Plateau,Rivers,Sokoto,Taraba,Yobe,Zamfara
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4/30/2020,0,0,2,0,0,0,0,6,0,2,...,1,0,3,2,0,2,4,0,0,1
5/1/2020,0,0,2,0,0,0,0,6,0,2,...,1,0,3,2,0,2,7,0,0,1
5/2/2020,0,0,2,0,0,0,0,11,0,2,...,2,0,3,2,0,2,8,0,1,1
5/3/2020,0,0,2,0,0,0,0,12,0,2,...,2,0,3,2,0,2,8,0,1,1
05/04/2020,0,0,2,0,0,0,0,14,0,2,...,2,0,4,2,0,2,8,0,1,1
