# COMP41680 - Assignment 1
##### Adam Judge - 16343971

## Task 1: Identify one or more suitable web APIs
###### Api Chosen:
A single API chosen for this assignment was the COVID-19 API - https://covid19api.com/


In [174]:
import os
import urllib.request
import csv
import json
import pandas as pd 
import requests
import time
from datetime import datetime, timedelta

# Task 1: Collect data from chosen API
###### Collect Data:
The following functions are used in the creation of files, Collection of data

In [2]:
def Json2file(FileName, Data):
    print(f"Writing JSON data to {FileName}")
    with open(FileName, "w") as File:
        json.dump(Data, File)
    File.close()

def makeDir(Folder):
    if not os.path.exists(Folder):
        os.mkdir(Folder)
    else:
        print(f"Folder {Folder} already exists!")


#### Raw COVID-19 data will be extracted for all countries in the European Union and written to .json files

In [55]:
countries = ["austria", "belgium", "bulgaria", "croatia", "cyprus", "czechia", "denmark", "estonia", "finland", "france", "germany", "greece", "hungary", "ireland", "italy", "latvia", "lithuania", "luxembourg", "malta", "netherlands",
             "poland", "portugal", "romania", "slovakia", "slovenia", "spain", "sweden"]
FileNames=[]
ErrCnt=0
makeDir("Raw_Data")
for i, c in enumerate(countries):
    URL=f"http://api.covid19api.com/total/dayone/country/{c}"
    response=requests.get(URL)
    if response.status_code == 200:
        print(f"Successfully obtained data for {c}")
        Json2file(f"./Raw_data/{c}_rawdata.json", response.json())
        FileNames.append(f"./Raw_data/{c}_rawdata.json")
    else:
        print(f"Error for country {c}, {response.status_code}")
        ErrCnt+=1
    # Delay so as to not request too much in short period of time
    time.sleep(1)
print(f"Number of errors: {ErrCnt}")

Folder Raw_Data already exists!
Successfully obtained data for austria
Writing JSON data to ./Raw_data/austria_rawdata.json
Successfully obtained data for belgium
Writing JSON data to ./Raw_data/belgium_rawdata.json
Successfully obtained data for bulgaria
Writing JSON data to ./Raw_data/bulgaria_rawdata.json
Successfully obtained data for croatia
Writing JSON data to ./Raw_data/croatia_rawdata.json
Successfully obtained data for cyprus
Writing JSON data to ./Raw_data/cyprus_rawdata.json
Successfully obtained data for czechia
Writing JSON data to ./Raw_data/czechia_rawdata.json
Successfully obtained data for denmark
Writing JSON data to ./Raw_data/denmark_rawdata.json
Successfully obtained data for estonia
Writing JSON data to ./Raw_data/estonia_rawdata.json
Successfully obtained data for finland
Writing JSON data to ./Raw_data/finland_rawdata.json
Successfully obtained data for france
Writing JSON data to ./Raw_data/france_rawdata.json
Successfully obtained data for germany
Writing JSO

## Data Preparation and Modification Functions

In [247]:
# This function extracts the daily new cases from the total cumulative cases
def getDailyCases(df):
    TotalCases=df["Confirmed"].tolist()
    DailyCases=[]
    negCaseOcc=0
    # Iterate through the total confirmed cases
    for i, TC in enumerate(TotalCases):
        #First is the first case(s)
        if i == 0:
            DailyCases.append(f"{TC}")
            continue
            
        # Difference between current day and previous days cases    
        newCases=TC-TotalCases[i-1]
        if newCases<0:
            print(f"""Denotified Cases: 
            Country: \t{df['Country'][0]}
            Date:\t{df['Date'][i].date()}
            Previous: \t{TotalCases[i-1]}
            Current: \t{TC}""")  
            negCaseOcc+=1
            
        DailyCases.append(f"{newCases}")
        
    return DailyCases, negCaseOcc


# Function to pad data for missing dates from startDate to present
# TODO make work lol
# see https://www.reddit.com/r/learnpython/comments/cwndc8/help_with_filling_in_missing_date_rows_in_a/
def padDates(df, startDate):
    #Pad date up to and including today
    #endDate=datetime.today().date()
    #dateRange=pd.date_range(startDate, endDate)
    #index=pd.period_range(startDate, endDate)
    
    #df.index = pd.DatetimeIndex(df.index)
    #df=df.reindex(dateRange, columns=["Date"], fill_value=0)
    #df=df.reindex(index, fill_value=1)
    return df

# Want Date First
def reorderCols(df):
    dftest=df[["Date", "Country", "Confirmed", "New Cases", "Deaths"]]
    return dftest

# Function to remove hr/min/sec/TimeZone from Date entries
def removeTime(df):
    dates=[]
    for d in df["Date"]:
        try:
            dates.append(d.date())
        except:
            print("Already Removed Hours")
            return df["Date"]
        
    return dates

In [248]:
Columns2Drop=["CountryCode", "Province", "City", "CityCode", "Lat", "Lon", "Recovered", "Active"]
negCaseOcc=0

for country, filename in zip(countries, FileNames):
    df=pd.read_json(filename, orient="records")
    df=df.drop(Columns2Drop, axis=1)
    df["New Cases"], n = getDailyCases(df)
    df["Date"] = removeTime(df)
    df=reorderCols(df)
    negCaseOcc+=n

print(f"Number of days found in total with a drop in cumulative case numbers: {negCaseOcc}")

Denotified Cases: 
            Country: 	Cyprus
            Date:	2020-08-27
            Previous: 	1484
            Current: 	1467
Denotified Cases: 
            Country: 	Finland
            Date:	2020-07-15
            Previous: 	7301
            Current: 	7296
Denotified Cases: 
            Country: 	Finland
            Date:	2020-07-16
            Previous: 	7296
            Current: 	7293
Denotified Cases: 
            Country: 	France
            Date:	2020-04-04
            Previous: 	64452
            Current: 	47378
Denotified Cases: 
            Country: 	France
            Date:	2020-04-07
            Previous: 	50887
            Current: 	47396
Denotified Cases: 
            Country: 	France
            Date:	2020-04-23
            Previous: 	158868
            Current: 	157158
Denotified Cases: 
            Country: 	France
            Date:	2020-04-29
            Previous: 	169098
            Current: 	167643
Denotified Cases: 
            Country: 	France
            Da

In [212]:
df["Date"].iloc[0]
df["Date"].iloc[-1]

startDate=(datetime.today()-timedelta(days = 435)).date()
df=padDates(df, startDate)
df

Unnamed: 0,Country,Confirmed,Deaths,Date,New Cases
0,Sweden,1,0,2020-02-01,1
1,Sweden,1,0,2020-02-02,0
2,Sweden,1,0,2020-02-03,0
3,Sweden,1,0,2020-02-04,0
4,Sweden,1,0,2020-02-05,0
...,...,...,...,...,...
399,Sweden,684961,13003,2021-03-06,0
400,Sweden,684961,13003,2021-03-07,0
401,Sweden,684961,13003,2021-03-08,0
402,Sweden,695975,13042,2021-03-09,11014


In [264]:
startDate=(datetime.today()-timedelta(days = 435)).date()
dftest=df
ishere=dftest["Date"]
nextDate=startDate+timedelta(days=1)
i=0
while nextDate != datetime.today().date():
    if(nextDate in dftest["Date"].values):
        # Val present, continue on
        print(f"True")
    else:
        # Add row with date of nextDate and zeros
        print(f"false {nextDate} missing")
    i+=1
    nextDate+=timedelta(days=1)
    print(nextDate)

false 2020-01-02 missing
2020-01-03
false 2020-01-03 missing
2020-01-04
false 2020-01-04 missing
2020-01-05
false 2020-01-05 missing
2020-01-06
false 2020-01-06 missing
2020-01-07
false 2020-01-07 missing
2020-01-08
false 2020-01-08 missing
2020-01-09
false 2020-01-09 missing
2020-01-10
false 2020-01-10 missing
2020-01-11
false 2020-01-11 missing
2020-01-12
false 2020-01-12 missing
2020-01-13
false 2020-01-13 missing
2020-01-14
false 2020-01-14 missing
2020-01-15
false 2020-01-15 missing
2020-01-16
false 2020-01-16 missing
2020-01-17
false 2020-01-17 missing
2020-01-18
false 2020-01-18 missing
2020-01-19
false 2020-01-19 missing
2020-01-20
false 2020-01-20 missing
2020-01-21
false 2020-01-21 missing
2020-01-22
false 2020-01-22 missing
2020-01-23
false 2020-01-23 missing
2020-01-24
false 2020-01-24 missing
2020-01-25
false 2020-01-25 missing
2020-01-26
false 2020-01-26 missing
2020-01-27
false 2020-01-27 missing
2020-01-28
false 2020-01-28 missing
2020-01-29
false 2020-01-29 missing
202

In [263]:
#df.add(nextDate, axis="rows", fill_value=0)
nextDate


datetime.date(2021, 3, 11)