In [31]:
import numpy as np
import pandas as pd
import calendar
import re

### COVID19

In [4]:
#Import Covid Dataset

ds_covid = pd.read_csv("Data/COVID/owid-covid-data.csv")
#If missing values: Interpolate: ds_covid = ds_covid.interpolate(method='nearest')
ds_covid = ds_covid.fillna(0)
ds_covid.date = pd.to_datetime(ds_covid.date)
ds_covid['month'] = pd.DatetimeIndex(ds_covid['date']).month
ds_covid['month'] = ds_covid['month'].apply(lambda x: calendar.month_name[x])
ds_covid['year'] = pd.DatetimeIndex(ds_covid['date']).year
ds_covid.set_index(['date'],inplace=True)

In [24]:
countries = ['USA', 'IND', 'BRA', 'RUS', 'GBR', 'FRA', 'ESP', 'ITA' , 'TUR',  'DEU']

counts = []
mean = []
std = []
min_v = []
max_v = []

for iso in countries:
    country = ds_covid[ds_covid['iso_code']==iso][['new_cases']].reset_index().drop(columns=['date'])
    counts.append(country.count()[0])
    mean.append(country.mean()[0])
    std.append(country.std()[0])
    min_v.append(country.min()[0])
    max_v.append(country.max()[0])

print("Counts: ", np.mean(counts))
print("Mean: ", np.mean(mean))
print("STD: ", np.mean(std))
print("MIN: ", min(min_v))
print("MAX: ", max(max_v))

Counts:  351.4
Mean:  17556.742687014485
STD:  19553.469775960824
MIN:  -46076.0
MAX:  298031.0


### DOTS

In [29]:
#Import DOTS Dataset
ds_dots = pd.read_csv("Data/DOTS/Exports.csv")
countries = ['United States', 'India', 'Brazil', 'USSR', 'United Kingdom', 'France', 'Spain', 'Italy' , 'Turkey', 'Germany']

In [32]:
counts = []
mean = []
std = []
min_v = []
max_v = []

for c in countries:
    
    country = ds_dots.loc[ds_dots['Location'] == c]
    time = country.columns.tolist()[1:]
    for row in country.iterrows():
        values = row[1]
    values = [str(x) for x in values]
    values = [float(re.sub(',', '', x)) for x in values[1:]]
    time = [re.sub('M', '-', x) for x in time]
    country = pd.DataFrame(list(zip(time, values)), columns = ['Date', 'Exports'])
    
    country = country.fillna(0)
    country.Date = pd.to_datetime(country.Date)
    country['month'] = pd.DatetimeIndex(country['Date']).month
    country['month'] = country['month'].apply(lambda x: calendar.month_name[x])
    country['year'] = pd.DatetimeIndex(country['Date']).year
    country.set_index(['Date'],inplace=True)
    
    country = country[['Exports']].reset_index().drop(columns=['Date'])
    counts.append(country.count()[0])
    mean.append(country.mean()[0])
    std.append(country.std()[0])
    min_v.append(country.min()[0])
    max_v.append(country.max()[0])

print("Counts: ", np.mean(counts))
print("Mean: ", np.mean(mean))
print("STD: ", np.mean(std))
print("MIN: ", min(min_v))
print("MAX: ", max(max_v))

Counts:  254.0
Mean:  13570.985212598427
STD:  6275.553445917292
MIN:  0.0
MAX:  109011.45


### US Air Pollution

In [33]:
#Import US Pollution Dataset
ds_poll = pd.read_csv("Data/USPollution/USPollution.csv")
ds_poll = ds_poll.dropna()
ds_poll['Date Local'] = pd.to_datetime(ds_poll['Date Local'])
ds_poll['month'] = pd.DatetimeIndex(ds_poll['Date Local']).month
ds_poll['month'] = ds_poll['month'].apply(lambda x: calendar.month_name[x])
ds_poll['year'] = pd.DatetimeIndex(ds_poll['Date Local']).year
ds_poll.set_index(['Date Local'],inplace=True)

In [34]:
counts = []
mean = []
std = []
min_v = []
max_v = []


for i in [1,2,5,8,9,10,11,12,13,15]:

    country = ds_poll[ds_poll['State Code']==i][['CO Mean']].reset_index().drop(columns=['Date Local'])
    counts.append(country.count()[0])
    mean.append(country.mean()[0])
    std.append(country.std()[0])
    min_v.append(country.min()[0])
    max_v.append(country.max()[0])

print("Counts: ", np.mean(counts))
print("Mean: ", np.mean(mean))
print("STD: ", np.mean(std))
print("MIN: ", min(min_v))
print("MAX: ", max(max_v))

Counts:  4722.1
Mean:  0.3926498905978665
STD:  0.22031763634420098
MIN:  -0.304348
MAX:  3.477778


### Population Dataset

In [35]:
#Import Population Dataset
ds_pop = pd.read_csv("Data/Population/Pop.csv")
ds_pop = ds_pop.dropna()

In [36]:
countries = ['USA', 'IND', 'BRA', 'RUS', 'GBR', 'FRA', 'ESP', 'ITA' , 'TUR',  'DEU']

In [38]:
counts = []
mean = []
std = []
min_v = []
max_v = []

for iso in countries:
    country = ds_pop[ds_pop['Code']==iso][['Population by Country (Clio Infra (2016))']].reset_index().drop(columns=['index'])
    counts.append(country.count()[0])
    mean.append(country.mean()[0])
    std.append(country.std()[0])
    min_v.append(country.min()[0])
    max_v.append(country.max()[0])

print("Counts: ", np.mean(counts))
print("Mean: ", np.mean(mean))
print("STD: ", np.mean(std))
print("MIN: ", min(min_v))
print("MAX: ", max(max_v))

Counts:  21.5
Mean:  80244323.88008772
STD:  48291312.403779395
MIN:  800000.0
MAX:  1004124000.0


### Global Temperature

In [39]:
#Import Land Temp Dataset
ds_gtemp = pd.read_csv("Data/GlobalTemperature/GlobalLandTemperaturesByCountry.csv")
ds_gtemp = ds_gtemp.dropna()
ds_gtemp['dt'] = pd.to_datetime(ds_gtemp['dt'])
ds_gtemp['month'] = pd.DatetimeIndex(ds_gtemp['dt']).month
ds_gtemp['month'] = ds_gtemp['month'].apply(lambda x: calendar.month_name[x])
ds_gtemp['year'] = pd.DatetimeIndex(ds_gtemp['dt']).year
ds_gtemp.set_index(['dt'],inplace=True)

countries = ['United States', 'India', 'Brazil', 'Russia', 'United Kingdom', 'France', 'Spain', 'Italy' , 'Turkey', 'Germany']

In [40]:
counts = []
mean = []
std = []
min_v = []
max_v = []

for iso in countries:
    country =  ds_gtemp[ds_gtemp['Country']==c][['AverageTemperature']].reset_index().drop(columns=['dt'])
    counts.append(country.count()[0])
    mean.append(country.mean()[0])
    std.append(country.std()[0])
    min_v.append(country.min()[0])
    max_v.append(country.max()[0])

print("Counts: ", np.mean(counts))
print("Mean: ", np.mean(mean))
print("STD: ", np.mean(std))
print("MIN: ", min(min_v))
print("MAX: ", max(max_v))

Counts:  3166.0
Mean:  8.152496209728366
STD:  6.9134269691182535
MIN:  -9.298
MAX:  22.343000000000004
