In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import os
import time
import datetime
from pprint import pprint 
from scipy.stats import linregress

In [2]:
#Take the API links and turn the JSON data into individual JSON Files 

#Load the API links to get the JSON data 
def get_api_data(api_link):
    response = requests.get(api_link)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print("Error accessing API. Status code:", response.status_code)
        return None

# Take the loaded JSON data and create individual JSON files for each country 
def save_json_to_file(data, folder_path, file_name):
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=4)

def save_multiple_api_data(api_links, folder_path):
    for country, link in api_links.items():
        country_data = get_api_data(link)
        if country_data:
            file_name = f"{country}_data.json"
            save_json_to_file(country_data, folder_path, file_name)
            print(f"JSON data saved to {file_name} in the specified folder.")

# Dictionary of API links for different countries
api_links = {
    "Ireland": "https://disease.sh/v3/covid-19/variants/countries/Ireland",
    "France": "https://disease.sh/v3/covid-19/variants/countries/France",
    "Sweden": "https://disease.sh/v3/covid-19/variants/countries/Sweden",
    "Italy": "https://disease.sh/v3/covid-19/variants/countries/Italy",
    "Norway": "https://disease.sh/v3/covid-19/variants/countries/Norway", 
    "Germany": "https://disease.sh/v3/covid-19/variants/countries/Germany"
}

# Save each of the JSON files into a specific folder 
# Define the folder path where you want to save the files
folder_path = 'JSON_Data/'

# Save data from multiple API links to individual JSON files
save_multiple_api_data(api_links, folder_path)

JSON data saved to Ireland_data.json in the specified folder.
JSON data saved to France_data.json in the specified folder.
JSON data saved to Sweden_data.json in the specified folder.
JSON data saved to Italy_data.json in the specified folder.
JSON data saved to Norway_data.json in the specified folder.
JSON data saved to Germany_data.json in the specified folder.


In [3]:
#Take the JSON files and Convert them to CSV files to be saved in a separate folder
#This is done 

#Read each of the JSON Files
def read_json_file(json_file):
    with open(json_file, 'r') as file:
        data = json.load(file)
    return data

def save_csv_from_json(json_file, csv_file):
    data = read_json_file(json_file)
    df = pd.DataFrame(data)
    df.to_csv(csv_file, index=False)

#Convert each of the JSON files to CSV files 
def convert_multiple_json_to_csv(json_files, folder_path):
    for json_file in json_files:
        file_name = os.path.basename(json_file)
        csv_file = os.path.join(folder_path, os.path.splitext(file_name)[0] + '.csv')
        save_csv_from_json(json_file, csv_file)
        print(f"JSON data from {json_file} converted to CSV and saved to {csv_file}.")

# List of JSON files to convert
json_files = ['JSON_Data/Ireland_data.json','JSON_Data/France_data.json','JSON_Data/Sweden_data.json', 'JSON_Data/Italy_data.json', 'JSON_Data/Norway_data.json', 'JSON_Data/Germany_data.json' ]


# Save each of the CSV files into a specific folder 
# Define the folder path where you want to save the CSV files
folder_path = 'CSV_Data/'

# Convert multiple JSON files to CSV
convert_multiple_json_to_csv(json_files, folder_path)

JSON data from JSON_Data/Ireland_data.json converted to CSV and saved to CSV_Data/Ireland_data.csv.
JSON data from JSON_Data/France_data.json converted to CSV and saved to CSV_Data/France_data.csv.
JSON data from JSON_Data/Sweden_data.json converted to CSV and saved to CSV_Data/Sweden_data.csv.
JSON data from JSON_Data/Italy_data.json converted to CSV and saved to CSV_Data/Italy_data.csv.
JSON data from JSON_Data/Norway_data.json converted to CSV and saved to CSV_Data/Norway_data.csv.
JSON data from JSON_Data/Germany_data.json converted to CSV and saved to CSV_Data/Germany_data.csv.


In [4]:
#Turn the CSV files into DataFrames

# Define the path to the CSV file
ireland_csv = 'CSV_Data/ireland_data.csv'
france_csv = 'CSV_Data/france_data.csv'
sweden_csv = 'CSV_Data/sweden_data.csv'
italy_csv = 'CSV_Data/italy_data.csv'
norway_csv = 'CSV_Data/norway_data.csv'
germany_csv = 'CSV_Data/germany_data.csv'

# Read the CSV file into an individual DataFrame for each country
ireland_df = pd.read_csv(ireland_csv)
france_df = pd.read_csv(france_csv)
sweden_df = pd.read_csv(sweden_csv)
italy_df = pd.read_csv(italy_csv)
norway_df = pd.read_csv(norway_csv)
germany_df = pd.read_csv(germany_csv)

# Merge all the DataFrames together 
# The DataFrames had to be concatenated vertically to avoid memory issues as there is a large amount of data 
countries_merged = pd.concat([ireland_df, france_df, sweden_df, italy_df, norway_df, germany_df])

# Display the concatenated DataFrame
countries_merged.head()

Unnamed: 0,updated,country,yearWeek,source,newCases,numberSequenced,percentSequenced,validDenominator,variant,numberDetectionsVariant,numberSequencedKnownVariant,percentVariant
0,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,Other,0,0,0.0
1,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like,0,0,0.0
2,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like+F456L,0,0,0.0
3,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.351,0,0,0.0
4,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.617.2,0,0,0.0


In [5]:
#See all of the columns in the merged data set 
countries_merged.columns

Index(['updated', 'country', 'yearWeek', 'source', 'newCases',
       'numberSequenced', 'percentSequenced', 'validDenominator', 'variant',
       'numberDetectionsVariant', 'numberSequencedKnownVariant',
       'percentVariant'],
      dtype='object')

In [6]:
#Rename the columns so that they are easier to read
countries_merged = countries_merged.rename(columns = {"updated":"Updated",
                                                     "country": "Country",
                                                     "yearWeek": "Year-Week",
                                                     "source": "Source",
                                                     "newCases":"New Cases",
                                                     "numberSequenced":"Number Sequenced",
                                                     "percentSequenced":"Percent Sequenced",
                                                     "vaildDenominator": "Valid Denominator",
                                                     "variant":"Variant",
                                                     "numberDetectionsVariant": "Detections of Variant",
                                                     "numberSequencedKnownVariant": "Number Sequenced of Known Variant",
                                                     "percentVariant": "Percent Variant"})
#Show the merged DataFrame with the new column names 
countries_merged.head()

Unnamed: 0,Updated,Country,Year-Week,Source,New Cases,Number Sequenced,Percent Sequenced,validDenominator,Variant,Detections of Variant,Number Sequenced of Known Variant,Percent Variant
0,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,Other,0,0,0.0
1,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like,0,0,0.0
2,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like+F456L,0,0,0.0
3,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.351,0,0,0.0
4,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.617.2,0,0,0.0


In [7]:
#Need to convert Year-Week from int to date Format 

# Split Year-Week into Year and Week columns
countries_merged[['Year', 'Week']] = countries_merged['Year-Week'].str.split('-', expand=True)

# Convert Year and Week to Date
countries_merged['Date'] = pd.to_datetime(countries_merged['Year'] + countries_merged['Week'] + '1', format='%Y%W%w')

countries_merged.head()

Unnamed: 0,Updated,Country,Year-Week,Source,New Cases,Number Sequenced,Percent Sequenced,validDenominator,Variant,Detections of Variant,Number Sequenced of Known Variant,Percent Variant,Year,Week,Date
0,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,Other,0,0,0.0,2020,1,2020-01-06
1,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like,0,0,0.0,2020,1,2020-01-06
2,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like+F456L,0,0,0.0,2020,1,2020-01-06
3,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.351,0,0,0.0,2020,1,2020-01-06
4,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.617.2,0,0,0.0,2020,1,2020-01-06


In [8]:
# Export the merged DataFrame as a CSV file and save it in the CSV_Data folder
# Will use this and the other CSV files for the visualizations 

countries_merged.to_csv('CSV_Data/all_countries_data.csv', index=False)

