# COMP41680 - Assignment 1
##### Adam Judge - 16343971

## Task 1: Identify one or more suitable web APIs
###### Api Chosen:
A single API chosen for this assignment was the COVID-19 API - https://covid19api.com/


In [28]:
import os
import urllib.request
import csv
import json
import pandas as pd 
import requests
import time
from datetime import datetime, timedelta, date

# Task 1: Collect data from chosen API
###### Collect Data:
The following functions are used in the creation of files, Collection of data

In [55]:
def Json2file(Folder, FileName, Data):
    print(f"Writing JSON data to {Folder}/{FileName}")
    with open(Folder+FileName, "w") as File:
        json.dump(Data, File)
    File.close()

def makeDir(Folder):
    if not os.path.exists(Folder):
        os.mkdir(Folder)
    else:
        print(f"Folder {Folder} already exists!")


#### Raw COVID-19 data will be extracted for all countries in the European Union and written to .json files

In [56]:
countries = ["austria", "belgium", "bulgaria", "croatia", "cyprus", "czechia", "denmark", "estonia", "finland", "france", "germany", "greece", "hungary", "ireland", "italy", "latvia", "lithuania", "luxembourg", "malta", "netherlands",
             "poland", "portugal", "romania", "slovakia", "slovenia", "spain", "sweden"]
FileNames=[]
ErrCnt=0
makeDir("Raw_Data")
RawFolder="./Raw_Data"
for i, c in enumerate(countries):
    URL=f"http://api.covid19api.com/total/dayone/country/{c}"
    response=requests.get(URL)
    if response.status_code == 200:
        print(f"Successfully obtained data for {c}")
        Json2file("./Raw_Data",f"{c}_rawdata.json", response.json())
        FileNames.append(f"{c}_rawdata.json")
    else:
        print(f"Error for country {c}, {response.status_code}")
        ErrCnt+=1
    # Delay so as to not request too much in short period of time
    time.sleep(1)
print(f"Number of errors: {ErrCnt}")

Folder Raw_Data already exists!
Successfully obtained data for austria
Writing JSON data to ./Raw_Data/austria_rawdata.json
Successfully obtained data for belgium
Writing JSON data to ./Raw_Data/belgium_rawdata.json
Successfully obtained data for bulgaria
Writing JSON data to ./Raw_Data/bulgaria_rawdata.json
Successfully obtained data for croatia
Writing JSON data to ./Raw_Data/croatia_rawdata.json
Successfully obtained data for cyprus
Writing JSON data to ./Raw_Data/cyprus_rawdata.json
Successfully obtained data for czechia
Writing JSON data to ./Raw_Data/czechia_rawdata.json
Successfully obtained data for denmark
Writing JSON data to ./Raw_Data/denmark_rawdata.json
Successfully obtained data for estonia
Writing JSON data to ./Raw_Data/estonia_rawdata.json
Successfully obtained data for finland
Writing JSON data to ./Raw_Data/finland_rawdata.json
Successfully obtained data for france
Writing JSON data to ./Raw_Data/france_rawdata.json
Successfully obtained data for germany
Writing JSO

## Data Preparation and Modification Functions

In [59]:
# This function extracts the daily new cases from the total cumulative cases
def getDailyCases(df):
    TotalCases=df["Confirmed"].tolist()
    DailyCases=[]
    negCaseOcc=0
    # Iterate through the total confirmed cases
    for i, TC in enumerate(TotalCases):
        #First is the first case(s)
        if i == 0:
            DailyCases.append(f"{TC}")
            continue
            
        # Difference between current day and previous days cases    
        newCases=TC-TotalCases[i-1]
        if newCases<0:  
            negCaseOcc+=1
            
        DailyCases.append(f"{newCases}")
        
    return DailyCases, negCaseOcc


# Function to pad data for missing dates from 2020-01-01 to present
def padDates(df):
    # First date will be 2020-01-01
    nextDate=date.fromisoformat("2020-01-01")
    missingCnt=0
    addedCnt=0
    i=0
    # Check all rows based on Date
    while nextDate != datetime.today().date():
        # If the date is present do nothing
        if(nextDate in df["Date"].values):
            pass
        # If date is missing
        else:
            # If first row, cases deaths and new cases all should be zero
            if i==0:
                row2add=[nextDate, df["Country"][i], 0, 0, 0]
            # If not first row, new cases should be 0, confirmed cases and deaths should be same as prior row
            else:    
                row2add=[nextDate, df["Country"][i], df["Confirmed"][i-1], 0, df["Deaths"][i-1]]
                # Missing date found in middle of data and not at beginning
                if df["Confirmed"][i-1] != 0:
                    missingCnt+=1
                    
            # Make a dataframe with new row
            new_df=pd.DataFrame([row2add], columns=df.columns.values)
            print(f"Adding \"{row2add}\"")
            # Concatenate new row to correct position in dataframe
            df=pd.concat([df[:i], new_df, df[i:]], ignore_index=True)
            addedCnt+=1
            
        i+=1
        nextDate+=timedelta(days=1)
    
    return df, missingCnt, addedCnt

# Want Date First
def reorderCols(df):
    dftest=df[["Date", "Country", "Confirmed", "New Cases", "Deaths"]]
    return dftest

# Function to remove hr/min/sec/TimeZone from Date entries
def removeTime(df):
    dates=[]
    for d in df["Date"]:
        try:
            dates.append(d.date())
        except:
            print("Already Removed Hours")
            return df["Date"]
        
    return dates

Processing Data

In [66]:
Columns2Drop=["CountryCode", "Province", "City", "CityCode", "Lat", "Lon", "Recovered", "Active"]
Cols=["Date", "Country", "Confirmed", "New Cases", "Deaths"]
negCaseOcc=0
missingMid=0
missingTot=0
ProcessedDataFolder="./Processed"
FileNamePro=[]
makeDir(ProcessedDataFolder)
# Process data, remove unnecessary columns and append any missing data as appropriate
for country, filename in zip(countries, FileNames):
    df=pd.read_json(RawFolder+filename, orient="records")
    df=df.drop(Columns2Drop, axis=1)
    df["New Cases"], n = getDailyCases(df)
    df["Date"] = removeTime(df)
    df=reorderCols(df)
    df, missingCnt, addedCnt=padDates(df)
    missingMid+=missingCnt
    missingTot+=addedCnt
    negCaseOcc+=n
    fileNamePro=f"{country}_processed.json"
    Json2file(ProcessedDataFolder, filename, df.to_json(orient="records"))

print(f"Number of missing date entries found after first case: {missingMid}")
print(f"Number of missing date entries found in total: \t{missingTot}")
print(f"Number of days found in total with a drop in cumulative case numbers: {negCaseOcc}")

Adding "[datetime.date(2020, 1, 1), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 2), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 3), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 4), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 5), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 6), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 7), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 8), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 9), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 10), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 11), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 12), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 13), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 14), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 15), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 16), 'Austria', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 17), 'Austria', 0, 0, 0]"
Adding "[datetime.date(

Adding "[datetime.date(2020, 1, 12), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 13), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 14), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 15), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 16), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 17), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 18), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 19), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 20), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 21), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 22), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 23), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 24), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 25), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 26), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 27), 'Croatia', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 28), 'Croatia', 0, 0, 0]"
Adding "[datet

Adding "[datetime.date(2020, 1, 2), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 3), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 4), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 5), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 6), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 7), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 8), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 9), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 10), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 11), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 12), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 13), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 14), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 15), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 16), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 17), 'Denmark', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 18), 'Denmark', 0, 0, 0]"
Adding "[datetime.date

Adding "[datetime.date(2020, 1, 1), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 2), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 3), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 4), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 5), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 6), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 7), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 8), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 9), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 10), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 11), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 12), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 13), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 14), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 15), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 16), 'Germany', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 17), 'Germany', 0, 0, 0]"
Adding "[datetime.date(

Adding "[datetime.date(2020, 1, 20), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 21), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 22), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 23), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 24), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 25), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 26), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 27), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 28), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 29), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 30), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 31), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 1), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 2), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 3), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 4), 'Ireland', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 5), 'Ireland', 0, 0, 0]"
Adding "[datetime.d

Adding "[datetime.date(2020, 1, 28), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 29), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 30), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 31), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 1), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 2), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 3), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 4), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 5), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 6), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 7), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 8), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 9), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 10), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 11), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 12), 'Lithuania', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 13), 'Lithuania',

Adding "[datetime.date(2020, 1, 7), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 8), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 9), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 10), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 11), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 12), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 13), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 14), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 15), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 16), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 17), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 18), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 19), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 20), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 21), 'Netherlands', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 22), 'Netherlands', 0, 0, 0]"
Adding "[da

Adding "[datetime.date(2020, 1, 3), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 4), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 5), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 6), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 7), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 8), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 9), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 10), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 11), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 12), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 13), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 14), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 15), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 16), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 17), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 18), 'Romania', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 19), 'Romania', 0, 0, 0]"
Adding "[datetime.dat

Adding "[datetime.date(2020, 2, 23), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 24), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 25), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 26), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 27), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 28), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 2, 29), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 3, 1), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 3, 2), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 3, 3), 'Slovenia', 0, 0, 0]"
Adding "[datetime.date(2020, 3, 4), 'Slovenia', 0, 0, 0]"
Writing JSON data to ./Processed/slovenia_rawdata.json
Adding "[datetime.date(2020, 1, 1), 'Spain', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 2), 'Spain', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 3), 'Spain', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 4), 'Spain', 0, 0, 0]"
Adding "[datetime.date(2020, 1, 5), 'Spain', 0, 0, 0]"
Adding "[datetime.date(20

In [65]:
RawFolder="./Raw_Data"
filename

'austria_rawdata.json'