In [1]:
base_dir = '/content/drive/My Drive/Projects/Epidemic Control Portal/Meteorological Analysis/'

#### Importing Libraries

In [2]:
import pandas as pd
import numpy as np

from scipy.stats import pearsonr, spearmanr

import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

plotly.io.renderers='colab'

#### Importing data

In [11]:
data_0 = pd.read_csv(f'{base_dir}/data/USA/combined_processed.csv')
data_7 = pd.read_csv(f'{base_dir}/data/USA/combined_7_processed.csv')
data_14 = pd.read_csv(f'{base_dir}/data/USA/combined_14_processed.csv')

#### Average Graphs

In [13]:
fig = go.Figure()

data = data_0.groupby(by='name')['cases'].sum().reset_index()
fig.add_bar(x = data['name'], y = data['cases'], name = 'cases')


data = (data_0.groupby(by='name')['TEMP'].sum()/data_0.groupby(by='name')['TEMP'].count()).reset_index()
fig.add_trace(go.Scatter(x = data['name'], y = data['TEMP'], name = 'TEMP', yaxis='y2'))


data = (data_0.groupby(by='name')['DTR'].sum()/data_0.groupby(by='name')['DTR'].count()).reset_index()
fig.add_trace(go.Scatter(x = data['name'], y = data['DTR'], name = 'DTR', yaxis='y2'))


data = (data_0.groupby(by='name')['DEWP'].sum()/data_0.groupby(by='name')['DEWP'].count()).reset_index()
fig.add_trace(go.Scatter(x = data['name'], y = data['DEWP'], name = 'DEWP', yaxis='y2'))


data = (data_0.groupby(by='name')['WDSP'].sum()/data_0.groupby(by='name')['WDSP'].count()).reset_index()
fig.add_trace(go.Scatter(x = data['name'], y = data['WDSP'], name = 'WDSP', yaxis='y3'))

fig.update_layout(
     xaxis=dict(
        domain=[0.07, 0.9]
    ),

    yaxis=dict(
        title="Cases",
    ),

    yaxis2=dict(
        title="Temprature (°F)",

        anchor="x",
        overlaying="y",
        side="right"
    ),

    yaxis3=dict(
        title="Wind Speed (km/h)",

        anchor="free",
        overlaying="y",
        side="left",
        position=0.0
    ), 
    legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=0.8
  )
)

# Update layout properties
fig.update_layout(
    title_text="Total COVID-19 case counts, average values of meteorological factors in 19\
    cities of USAduring the period of March 1st to November 1st",
    width=1300,
    height=650
)

fig.show()

#### Utils

In [14]:
def plot(data, name):

    data = data.loc[data['name'] == name]
    fig = go.Figure()
    fig.add_bar(x = data['date'], y = data['cases'], name = 'cases')

    fig.add_trace(go.Scatter(x = data['date'], y = data['TEMP'], name = 'TEMP', yaxis='y2'))
    fig.add_trace(go.Scatter(x = data['date'], y = data['DTR'], name = 'DTR', yaxis='y2'))
    fig.add_trace(go.Scatter(x = data['date'], y = data['DEWP'], name = 'DEWP', yaxis='y2'))
    fig.add_trace(go.Scatter(x = data['date'], y = data['WDSP'], name = 'WDSP', yaxis='y3'))

    fig.update_layout(
        xaxis=dict(
        domain=[0.07, 0.9]
      ),

      yaxis=dict(
        title="Cases",
      ),

      yaxis2=dict(
        title="Temprature (°F)",

        anchor="x",
        overlaying="y",
        side="right"
      ),

      yaxis3=dict(
        title="Wind Speed (km/h)",

        anchor="free",
        overlaying="y",
        side="left",
        position=0.0
      ), 
      legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=0.8
    )
    )

    # Update layout properties
    fig.update_layout(
      title_text=f"Covid-19 cases and meteorological factors in city {name} over time",
      width=1300,
      height=650
    )

    fig.show()

In [15]:
def correlation_coeff(data, name):
    data = data.loc[data['name'] == name]
    relation_df = pd.DataFrame()

    X = data['cases'].values
    pearson = []
    spearman = []

    for factor in ['TEMP', 'DEWP', 'DTR', 'WDSP']:
        Y = data[factor].values
        pearson.append(pearsonr(X, Y)[0])
        spearman.append(spearmanr(X, Y)[0])

    relation_df['Y'] = ['TEMP', 'DEWP', 'DTR', 'WDSP']
    relation_df['Pearson'] = pearson
    relation_df['Spearman'] = spearman
    relation_df.sort_values(by=['Spearman'], inplace=True)

    print(relation_df)

#### Citywise analysis

In [16]:
for name in data_7['name'].unique():
    plot(data_7, name)
    correlation_coeff(data_7, name)

      Y   Pearson  Spearman
2   DTR -0.166377 -0.180774
3  WDSP -0.113464 -0.136787
1  DEWP  0.357197  0.421746
0  TEMP  0.378350  0.431501


      Y   Pearson  Spearman
3  WDSP  0.086874  0.078322
2   DTR  0.255006  0.323847
1  DEWP  0.325407  0.423726
0  TEMP  0.444652  0.539217


      Y   Pearson  Spearman
1  DEWP -0.303770 -0.222907
0  TEMP -0.300481 -0.170995
3  WDSP -0.044504 -0.072292
2   DTR -0.006006  0.021330


      Y   Pearson  Spearman
0  TEMP -0.427541 -0.421838
1  DEWP -0.323971 -0.350915
2   DTR -0.219118 -0.159422
3  WDSP  0.165663  0.203625


      Y   Pearson  Spearman
3  WDSP -0.095890 -0.186318
2   DTR -0.046593 -0.026106
0  TEMP  0.558791  0.731522
1  DEWP       NaN       NaN



An input array is constant; the correlation coefficent is not defined.


invalid value encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in greater


invalid value encountered in less


invalid value encountered in less_equal



      Y   Pearson  Spearman
1  DEWP -0.054064  0.010381
0  TEMP -0.062895  0.056342
2   DTR  0.090420  0.111541
3  WDSP  0.045813  0.130791


      Y   Pearson  Spearman
3  WDSP -0.049779 -0.119214
1  DEWP -0.146700  0.024653
0  TEMP -0.141897  0.034822
2   DTR  0.070196  0.158817


      Y   Pearson  Spearman
0  TEMP -0.415599 -0.276477
1  DEWP -0.413292 -0.271338
3  WDSP  0.178257 -0.000996
2   DTR  0.310180  0.092891


      Y   Pearson  Spearman
2   DTR -0.154975 -0.133218
3  WDSP -0.122349 -0.109411
1  DEWP  0.311436  0.410892
0  TEMP  0.295290  0.423484



An input array is constant; the correlation coefficent is not defined.


invalid value encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in greater


invalid value encountered in less


invalid value encountered in less_equal



      Y   Pearson  Spearman
2   DTR -0.193193 -0.165417
0  TEMP  0.150038  0.151257
1  DEWP  0.225849  0.234103
3  WDSP       NaN       NaN


      Y   Pearson  Spearman
3  WDSP -0.166003 -0.242457
2   DTR -0.095219 -0.136431
0  TEMP  0.230211  0.348430
1  DEWP  0.252838  0.411463


      Y   Pearson  Spearman
3  WDSP -0.042710 -0.103334
2   DTR  0.102014  0.005958
0  TEMP  0.117954  0.333173
1  DEWP  0.112776  0.338927


      Y   Pearson  Spearman
0  TEMP -0.214171 -0.049427
1  DEWP -0.224379 -0.036388
3  WDSP -0.061818 -0.026281
2   DTR  0.086894  0.073968


      Y   Pearson  Spearman
3  WDSP -0.189267 -0.240820
1  DEWP -0.189269  0.019614
0  TEMP  0.064394  0.207682
2   DTR  0.263285  0.282138


      Y   Pearson  Spearman
2   DTR -0.119612 -0.156790
3  WDSP -0.118575 -0.126934
0  TEMP  0.348067  0.466973
1  DEWP  0.365446  0.472881


      Y   Pearson  Spearman
0  TEMP -0.357750 -0.311695
1  DEWP  0.061389 -0.251961
2   DTR -0.148392 -0.140533
3  WDSP  0.145403  0.142858


      Y   Pearson  Spearman
0  TEMP -0.147711 -0.326039
2   DTR  0.008882  0.011043
3  WDSP  0.004199  0.187983
1  DEWP       NaN       NaN



An input array is constant; the correlation coefficent is not defined.


invalid value encountered in true_divide


invalid value encountered in true_divide


invalid value encountered in greater


invalid value encountered in less


invalid value encountered in less_equal



      Y   Pearson  Spearman
2   DTR -0.013730 -0.045188
3  WDSP -0.029868  0.049116
1  DEWP  0.286463  0.398173
0  TEMP  0.360019  0.476107


      Y   Pearson  Spearman
3  WDSP -0.191945 -0.224580
2   DTR  0.142801  0.200729
0  TEMP  0.232915  0.376935
1  DEWP  0.246118  0.388411
