# US Coranvirus_Analysis

#### Analysis of Coronavirus (COVID-19) Evolution and Impact

## 1 Import, clean and format confirmed Cases

### 1.0 Data Sources

#### Source of data: https://raw.githubusercontent.com/CSSEGISandData, this is updated by John Hopkins Univiersity from a variety of government and NGO sources
#### For full list of sources see: https://systems.jhu.edu/research/public-health/ncov/
#### originally had been pulling directly from google sheets as per

#### csv_url='http://spreadsheets.google.com/ccc?key=1UF2pSkFTURko2OvfHWWlFpDFAr1UxCBA4JLwlSP6KFo&output=csv'



In [11]:


import pandas as pd
import numpy as np
import requests as rs
import pandas as pd

#csv_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

#read cases

csv_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'

res=rs.get(url=csv_url)
open('coronavirus_stats_confUS.csv', 'wb').write(res.content)

#read_conf = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/coronavirus_stats_conf2.csv', encoding = "ISO-8859-1")

#read deaths
csv_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'

res=rs.get(url=csv_url)
open('coronavirus_stats_deathUS.csv', 'wb').write(res.content)



1697471

### 1.1 Reading in cases

In [12]:


#read_conf = pd.read_csv('/Users/neilwatt/Documents/Projects/Coronavirus/Coronavirus_Analysis/coronavirus_stats_confUS.csv', encoding = "ISO-8859-1")
read_conf = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/coronavirus_stats_confUS.csv', encoding = "ISO-8859-1")

#replace blank province/state with nil
read_conf['Province_State'].fillna('', inplace=True)

#replace all other NaNs with 0
read_conf.fillna(0, inplace=True)

#create new name key column
read_conf.insert(1, 'name', read_conf["Province_State"].map(str))




### Reading in deaths

In [13]:
read_deaths = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/coronavirus_stats_deathUS.csv', encoding = "ISO-8859-1")


#replace blank province/state with nil
read_deaths['Province_State'].fillna('', inplace=True)

#replace all other NaNs with 0
read_deaths.fillna(0, inplace=True)

#create new name key column
read_deaths.insert(1, 'name', read_deaths["Province_State"].map(str))



### 1.2 Formatting and Cleaning Data (Cases)

#### The data in the CSV file needs to be re-worked and reshaped to facilitate convenient plotting.
#### 1.  I first sliced the csv into two dataframes and then transposed the date columns (from column 5 onwards)
#### 2. Next step was to rework the cases figures into a "Cases" column according to the Date column (which is created from the index date)

In [14]:


#slice confirmed cases into two dfs in order to recombine transpose of the cases table with the regions

read_conf_copy=read_conf.copy()

#this creates a dataframe which is a slice with just the city rows (which will be replciated for each date)

new_city=read_conf_copy.iloc[:,0:12]
read_conf_copy=read_conf.copy()

#this slices the date columns from the original

#new_df2=read_conf_copy.iloc[:,6:]
new_df2=read_conf_copy.iloc[:,12:]



#transpose date and cases columnes
new_df2_T=new_df2.T


#number cities
i_points=len(read_conf.index)

#new_df2_T_1=new_df2_T.iloc[:,0:1]
new_df2_T['Date'] = new_df2_T.index

d = {}
for i in range (0,i_points):
    d[i]=new_df2_T.iloc[:,i].to_frame()
    d[i].rename(columns={ d[i].columns[0]: "Cases" }, inplace = True)
    d[i]['Date'] = d[i].index
    
   


#### Dataframes are subsequently re-combined and duplicates are eliminated

In [15]:

#recombine dataframes

dfs={}
for i in range (0,i_points):


#match with df1 based on column header
    city_row=new_city.loc[i,:].to_frame()
    city_row_T=city_row.T

#needs to be repeated same number of times as date columns
    n_times=len(d[i].index)
    city_row_T2=pd.concat([city_row_T]*n_times)


    d[i]['tmp'] = 1
    city_row_T2['tmp'] = 1

    dfs[i] = pd.merge(city_row_T2,d[i], on=['tmp'])
    dfs[i] = dfs[i].drop('tmp', axis=1)
    dfs[i]= dfs[i].drop_duplicates(subset='Date')







In [16]:


#append dataframes into master format
from datetime import datetime


final_df = pd.concat(dfs, ignore_index=True)
#in order to model number of days from onset, convert to datetime
final_df['Date_proper'] = pd.to_datetime(final_df['Date'])

#Create new days column based on number of days from first row
final_df['Day']=((final_df['Date_proper']-final_df['Date_proper'].iloc[0]).dt.total_seconds())/(24*60*60)+1

#create DoD delta cases
#final_df['Delta_Cases']=((final_df['Cases']-final_df['Cases'].iloc[i-1]))

#final_df_international['total_cases']=final_df_international['Cases'].groupby(final_df_international['Date']).transform('sum')


#final_df['Delta_Cases']=((final_df['Cases']-final_df['Cases'].shift(1))).groupby(final_df['name']).transform('sum')


final_df['Delta_Cases']=final_df['Cases'].groupby(final_df['name']).diff()


final_df.to_csv(r'Coronavirus_cases_cleanedUS.csv')

### 1.3 Formatting and Cleaning Data (Deaths)

In [17]:
#slice confirmed cases into two dfs in order to recombine transpose of the cases table with the regions

read_deaths_copy=read_deaths.copy()

#this creates a dataframe which is a slice with just the city rows (which will be replciated for each date)

new_citydeaths=read_deaths_copy.iloc[:,0:13]
read_deaths_copy=read_deaths.copy()

#this slices the date columns from the original

#new_df2=read_conf_copy.iloc[:,6:]
new_df2deaths=read_deaths_copy.iloc[:,13:]



#transpose date and cases columnes
new_df2_Tdeaths=new_df2deaths.T


#number cities
i_pointsdeaths=len(read_deaths.index)

#new_df2_T_1=new_df2_T.iloc[:,0:1]
new_df2_Tdeaths['Date'] = new_df2_Tdeaths.index

d = {}
for i in range (0,i_pointsdeaths):
    d[i]=new_df2_Tdeaths.iloc[:,i].to_frame()
    d[i].rename(columns={ d[i].columns[0]: "deaths" }, inplace = True)
    d[i]['Date'] = d[i].index

In [18]:

#recombine dataframes

dfsdeaths={}
for i in range (0,i_pointsdeaths):


#match with df1 based on column header
    city_rowdeaths=new_citydeaths.loc[i,:].to_frame()
    city_row_Tdeaths=city_rowdeaths.T

#needs to be repeated same number of times as date columns
    n_times=len(d[i].index)
    city_row_T2deaths=pd.concat([city_row_Tdeaths]*n_times)


    d[i]['tmp'] = 1
    city_row_T2deaths['tmp'] = 1

    dfsdeaths[i] = pd.merge(city_row_T2deaths,d[i], on=['tmp'])
    dfsdeaths[i] = dfsdeaths[i].drop('tmp', axis=1)
    dfsdeaths[i]= dfsdeaths[i].drop_duplicates(subset='Date')


In [19]:
#append dataframes into master format
from datetime import datetime


final_dfdeaths = pd.concat(dfsdeaths, ignore_index=True)
#in order to model number of days from onset, convert to datetime
final_dfdeaths['Date_proper'] = pd.to_datetime(final_dfdeaths['Date'])

#Create new days column based on number of days from first row
final_dfdeaths['Day']=((final_dfdeaths['Date_proper']-final_dfdeaths['Date_proper'].iloc[0]).dt.total_seconds())/(24*60*60)+1

#create DoD delta cases
#final_df['Delta_Cases']=((final_df['Cases']-final_df['Cases'].iloc[i-1]))

#final_df_international['total_cases']=final_df_international['Cases'].groupby(final_df_international['Date']).transform('sum')


#final_df['Delta_Cases']=((final_df['Cases']-final_df['Cases'].shift(1))).groupby(final_df['name']).transform('sum')


final_dfdeaths['Delta_Deaths']=final_dfdeaths['deaths'].groupby(final_dfdeaths['name']).diff()


final_dfdeaths.to_csv(r'Coronavirus_deaths_cleanedUS.csv')

### Defining Dataframes

## Plotting Cases by Geography

In [20]:
import  plotly.express as px
#https://plot.ly/python-api-reference/generated/plotly.express.scatter_geo.html
scale=0.1




fig = px.scatter_geo(final_df_MChina , lat="Lat",lon="Long",
                    color="Cases",
                    #size="Cases",
                     size_max=70,
                    size='Cases',
                     #size="Cases",
                     title="Coronavirus Cases by Day (Mainland China))",
                     
                   hover_name="name", 
                     #labels="name",
                    color_continuous_scale=px.colors.sequential.RdBu[::-1],
                     # color_continuous_scale=px.colors.sequential.Plasma,
                     #plotly.express.colors.diverging
                    animation_frame="Date"
                    
                    
                    )
fig.show()

NameError: name 'final_df_MChina' is not defined

In [None]:

# plot of international cases

import plotly.express as px


fig = px.line(final_df_international,title="International Cases", x="Date", y="Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:

df_italy=final_df_international[final_df_international['Country/Region']=='Italy']
                                
df_italy.head()

In [None]:
final_df_international.head()


In [None]:
final_df_MChina.head()

In [None]:
#non Mainland China only

import  plotly.express as px
#https://plot.ly/python-api-reference/generated/plotly.express.scatter_geo.html


final_df_US_specific=final_df[(final_df['Country/Region']=='US') ]


fig = px.scatter_geo(final_df_US_specific , lat="Lat",lon="Long",
                    color="Cases",
                    #size="Cases",
                     size_max=40,
                    size='Cases',
                     #size="Cases",
                     title="Coronavirus Cases by Day (U.S))",
                     
                   hover_name="name", 
                     #labels="name",
                  #  color_continuous_scale=px.colors.sequential.RdBu[::-1],
                    # color_continuous_scale=px.colors.sequential.RdBu[::-1],
                     # color_continuous_scale=px.colors.sequential.Plasma,
                     #plotly.express.colors.diverging
                    animation_frame="Date"
                   
                    
                    
                    )
fig.show()

In [None]:
df_top=final_df_international[final_df_international['Country/Region'].isin(['Republic of Korea','Italy','Iran (Islamic Republic of)','Japan','France','Germany','Spain'
,'Singapore'
,'Hong Kong'
,'Kuwait'
,'Switzerland'
,'UK'
])]
df_top.head()

      
      

#### International Cases 

In [None]:
final_df_international=final_df[final_df['Country/Region']!='China']




In [None]:
# plot of ex Mainland cases (total)


#exlcude Mainland China cases for international cases
#final_df_international=final_df[final_df['Country/Region']!='Mainland China']

final_df_international['total_cases']=final_df_international['Cases'].groupby(final_df_international['Date']).transform('sum')




#df_international=final_df_international[final_df_international['Country/Region'].isin(['South Korea','Italy','Iran','Japan','France','Germany','Spain'

           
#df_international=df[-df["column"].isin(["value"])]
                                                                                    

In [None]:

import plotly.express as px


fig = px.line(final_df_international,title="International Cases outside Mainland China", x="Date", y="total_cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

### Euro

In [None]:
final_df_international['total_cases']=final_df_international['Cases'].groupby(final_df_international['Date']).transform('sum')
final_df_international

In [None]:
  

#filter out double France and UK cases

Euro_list=[' Austria', ' Belgium', ' Bosnia and Herzegovina', ' Cyprus', ' Czech Republic', ' Denmark', ' Finland', ' France', ' Germany', ' Gibraltar', ' Hungary', ' Iceland', ' Ireland', ' Italy', ' Malta', ' Netherlands', ' Norway', ' Poland', ' Portugal', ' Spain', ' Sweden', ' Switzerland', ' United Kingdom', ' Albania', ' Bulgaria', ' Channel Islands', ' Faroe Islands', ' Greece', ' Liechtenstein', ' North Macedonia', ' Romania'

]

final_df_Euro=final_df_international[final_df_international['name'].isin(Euro_list)]
final_df_Euro['total_cases']=final_df_Euro['Cases'].groupby(final_df_Euro['Date']).transform('sum')




In [None]:
import plotly.express as px


fig = px.line(final_df_Euro,title="Euro Cases", x="Date", y="total_cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:
## plot of Euro cases by country





import plotly.express as px


fig = px.line(final_df_Euro,title="Euro Cases", x="Date", y="Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:
#Euro Delta_Cases



import plotly.express as px


fig = px.line(final_df_Euro,title="Day on Day Delta Euro Cases", x="Date", y="Delta_Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:
# define UK dataframe

UK_list=[ 'United Kingdom United Kingdom']




final_df_UK=final_df_international[final_df_international['name'].isin(UK_list)]

#filter out double France and UK cases




final_df_UK['total_cases']=final_df_UK['Cases'].groupby(final_df_UK['Date']).transform('sum')

In [None]:
## plot of UK cases



US_list=[  ' US']




final_df_US=final_df_international[final_df_international['name'].isin(US_list)]

#filter out double France and UK cases







import plotly.express as px


fig = px.line(final_df_US,title="New Covid-19 Cases (US)", x="Date", y="Delta_Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:




#X_list=[ ' Italy', ' Spain', ' Germany', ' France', ' United Kingdom',' Netherlands', ' Sweden']

#y_list=[' France',' Germany', ' Hungary',' Italy',' Netherlands',' Poland',' Spain',' United Kingdom']


y_list=[' Germany', ' Italy',' Spain',' France',' United Kingdom', ' Netherlands', ' Belgium', ' Sweden']


final_df_EuroX=final_df_international[final_df_international['name'].isin(y_list)]

#filter out double France and UK cases
final_df_EuroX['total_cases']=final_df_EuroX['Cases'].groupby(final_df_EuroX['Date']).transform('sum')







import plotly.express as px


fig = px.line(final_df_EuroX,title="New Covid-19 Cases (Euro)", x="Date", y="Delta_Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:
final_df_EuroX['total_Delta_Cases']=final_df_EuroX['Delta_Cases'].groupby(final_df_EuroX['Date']).transform('sum')

import plotly.express as px


fig = px.line(final_df_EuroX,title="New Covid-19 Cases (Euro)", x="Date", y="total_Delta_Cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()


In [None]:

Latam_list=[' Argentina', ' Brazil',' Chile',' Colombia',' Mexico',' Venezuela']




final_df_Latam=final_df_international[final_df_international['name'].isin(Latam_list)]

#filter out double France and UK cases


final_df_Latam['total_cases']=final_df_Latam['Cases'].groupby(final_df_Latam['Date']).transform('sum')





import plotly.express as px


fig = px.line(final_df_Latam,title="New Cases (Latam)", x="Date", y="Delta_Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:

East_list=[' United Arab Emirates', ' Saudi Arabia',' India',' Iran',' Japan',' Korea, South',' Taiwan*',' Thailand',' Vietnam']


final_df_Asia=final_df_international[final_df_international['name'].isin(East_list)]

#filter out double France and UK cases


final_df_Asia['total_cases']=final_df_Asia['Cases'].groupby(final_df_Asia['Date']).transform('sum')





import plotly.express as px


fig = px.line(final_df_Asia,title="New Cases (East ex China)", x="Date", y="Delta_Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

###  U.S

In [None]:
### Cases U.S (aggregate)

#final_df_US=final_df[~final_df['Province/State'].str.contains('County')]



final_df_US=final_df[(final_df['Country/Region']=='US') & (~final_df['Province/State'].str.contains('County'))& (~final_df['Province/State'].str.contains(','))]


final_df_US['total_cases']=final_df_US['Cases'].groupby(final_df_US['Date']).transform('sum')


#df = df.loc[(df['Source airport'].isin(['LCY','LHR','LGW','LTN','STN','SEN'])) | (df['Destination airport'].isin(['LCY','LHR','LGW','LTN','STN','SEN']))]



In [None]:





import plotly.express as px


fig = px.line(final_df_US,title="New Covid-19 Cases (US)", x="Date", y="Delta_Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

### China

In [None]:
final_df_MChina

In [None]:
#select provinces

import plotly.express as px

Chinay_list=['Guangdong China', 'Henan China','Hunan China','Shandong China','Zhejiang China']


final_df_MChinaX=final_df_MChina[final_df_MChina['name'].isin(Chinay_list)]

fig = px.line(final_df_MChinaX,title="New Cases Selected Provinces (China)", x="Date", y="Delta_Cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()


In [None]:
#total cases China

final_df_MChina['total_Delta_Cases']=final_df_MChina['Delta_Cases'].groupby(final_df_MChina['Date']).transform('sum')




In [None]:
import plotly.express as px


fig = px.line(final_df_MChina,title="New Cases (China)", x="Date", y="total_Delta_Cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

In [None]:
fig = px.line(final_df_EuroX,title="Euro Cases", x="Date", y="total_cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))

fig.show()




In [None]:
df_top=final_df_international[final_df_international['Country/Region'].isin(['Korea, South','Italy','Iran','Japan','France','Germany','Spain'
,'Singapore'
,'Hong Kong'
,'Kuwait'
,'Switzerland'
,'UK'
])]
df_top.head()


In [None]:




# plot of international cases- 



import plotly.express as px


fig = px.line(df_top,title="International Cases (top 12)", x="Date", y="Cases", color="name", line_group="name", hover_name="name",render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

## Distribution of Cases

In [None]:
#define exponential function

import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b):
    return a * np.exp(b * x) 



In [None]:
#conf range https://lmfit.github.io/lmfit-py/model.html

Cases=final_df_MChina_Hubei['Cases']
Days=final_df_MChina_Hubei['Day']

#empirical data
xdata = Days
y = Cases
ydata = y




popt, pcov = curve_fit(func, xdata, ydata)


fig, ax = plt.subplots(1, 1, figsize=(10, 6))
ax.plot(xdata, func(xdata, *popt), 'r-',label='Exp Model: a=%5.3f, b=%5.3f' % tuple(popt))
ax.plot(xdata, ydata, 'b-', label='Reported Cases')
plt.xlabel('Days')
plt.ylabel('Cases')
ax.set_title('Hubei Coronavirus Cases by Day')
ax.legend()

In [None]:
final_df_MChina_Hubei.head()

## Forecasting Future Cases

In [None]:
 

final_df_MChina_Hubei['y']=final_df_MChina_Hubei['Cases']
final_df_MChina_Hubei['ds']=final_df_MChina_Hubei['Date']


#Guangdong
final_df_MChina_Guangdong['y']=final_df_MChina_Guangdong['Cases']
final_df_MChina_Guangdong['ds']=final_df_MChina_Guangdong['Date']

#Zhejiang
final_df_MChina_Zhejiang['y']=final_df_MChina_Zhejiang['Cases']
final_df_MChina_Zhejiang['ds']=final_df_MChina_Zhejiang['Date']

#Henan
final_df_MChina_Henan['y']=final_df_MChina_Henan['Cases']
final_df_MChina_Henan['ds']=final_df_MChina_Henan['Date']

#Hunan
final_df_MChina_Hunan['y']=final_df_MChina_Hunan['Cases']
final_df_MChina_Hunan['ds']=final_df_MChina_Hunan['Date']

#Anhui
final_df_MChina_Anhui['y']=final_df_MChina_Anhui['Cases']
final_df_MChina_Anhui['ds']=final_df_MChina_Anhui['Date']

#Jiangxi
final_df_MChina_Jiangxi['y']=final_df_MChina_Jiangxi['Cases']
final_df_MChina_Jiangxi['ds']=final_df_MChina_Jiangxi['Date']




In [None]:

from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot


#NOTE, DEFAULT changepoint_prior_scale IS 0.05, DECREASING makes less flexible, INCREASIGN makes more flexible
changepoint_prior_scale_new=0.05
periods_new=30


df_Hubei=final_df_MChina_Hubei
gm_prophet = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet.fit(df_Hubei)
future = gm_prophet.make_future_dataframe(periods=periods_new)



#Guangdong

df_Guangdong=final_df_MChina_Guangdong
gm_prophet_Guangdong = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet_Guangdong.fit(df_Guangdong)
future_Guangdong = gm_prophet_Guangdong.make_future_dataframe(periods=periods_new)


#Zhejiang
df_Zhejiang=final_df_MChina_Zhejiang
gm_prophet_Zhejiang = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet_Zhejiang.fit(df_Zhejiang)
future_Zhejiang = gm_prophet_Zhejiang.make_future_dataframe(periods=periods_new)


#Henan
df_Henan=final_df_MChina_Henan
gm_prophet_Henan = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet_Henan.fit(df_Henan)
future_Henan = gm_prophet_Henan.make_future_dataframe(periods=periods_new)



#Hunan
df_Hunan=final_df_MChina_Hunan
gm_prophet_Hunan = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet_Hunan.fit(df_Hunan)
future_Hunan = gm_prophet_Hunan.make_future_dataframe(periods=periods_new)


#Anhui
df_Anhui=final_df_MChina_Anhui
gm_prophet_Anhui = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet_Anhui.fit(df_Anhui)
future_Anhui = gm_prophet_Anhui.make_future_dataframe(periods=periods_new)


#Jiangxi
df_Jiangxi=final_df_MChina_Jiangxi
gm_prophet_Jiangxi = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new)
gm_prophet_Jiangxi.fit(df_Jiangxi)
future_Jiangxi = gm_prophet_Jiangxi.make_future_dataframe(periods=periods_new)





In [None]:
#create forecast with upper and lower bounds
forecast = gm_prophet.predict(future)
#forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

#Guangdong
forecast_Guangdong = gm_prophet_Guangdong.predict(future_Guangdong)


#Zhejiang
forecast_Zhejiang = gm_prophet_Zhejiang.predict(future_Zhejiang)

#Henan

forecast_Henan = gm_prophet_Henan.predict(future_Henan)

#Hunan
forecast_Hunan = gm_prophet_Hunan.predict(future_Hunan)


#Anhui
forecast_Anhui = gm_prophet_Anhui.predict(future_Anhui)

#Jiangxi
forecast_Jiangxi = gm_prophet_Jiangxi.predict(future_Jiangxi)




In [None]:






fig_Guangdong = gm_prophet_Guangdong.plot(forecast_Guangdong)
changepoints_Guangdong = add_changepoints_to_plot(fig_Guangdong.gca(), gm_prophet_Guangdong, forecast_Guangdong)


fig_Zhejiang = gm_prophet_Zhejiang.plot(forecast_Zhejiang)
changepoints_Zhejiang = add_changepoints_to_plot(fig_Zhejiang.gca(), gm_prophet_Zhejiang, forecast_Zhejiang)

fig_Henan = gm_prophet_Henan.plot(forecast_Henan)
changepoints_Henan = add_changepoints_to_plot(fig_Henan.gca(), gm_prophet_Henan, forecast_Henan)


fig_Hunan = gm_prophet_Hunan.plot(forecast_Hunan)
changepoints_Hunan = add_changepoints_to_plot(fig_Hunan.gca(), gm_prophet_Hunan, forecast_Hunan)


fig_Anhui = gm_prophet_Anhui.plot(forecast_Anhui)
changepoints_Anhui = add_changepoints_to_plot(fig_Anhui.gca(), gm_prophet_Anhui, forecast_Anhui)


fig_Jiangxi = gm_prophet_Jiangxi.plot(forecast_Jiangxi)
changepoints_Jiangxi = add_changepoints_to_plot(fig_Jiangxi.gca(), gm_prophet_Jiangxi, forecast_Jiangxi)

#### Comparing trajectory of new cases for various provinces (Mainland China)

In [None]:

# plot major provinces showing similar change in trend (after redfinition of "infected")
#https://www.taiwannews.com.tw/en/news/3874490

fig = plt.figure(figsize=(10, 10))


ax1 = fig.add_subplot(4, 1, 1)
gm_prophet_Anhui.plot(forecast_Anhui, ax=ax1)
ax1.set_title('Anhui, Confirmed Coronavirus Cases')
changepoints = add_changepoints_to_plot(fig.gca(), gm_prophet_Anhui, forecast_Anhui)



ax2 = fig.add_subplot(4, 1, 2)
gm_prophet_Guangdong.plot(forecast_Guangdong, ax=ax2)
changepoints_Guangdong = add_changepoints_to_plot(fig.gca(), gm_prophet_Guangdong, forecast_Guangdong)
ax2.set_title('Guangdong, Confirmed Coronavirus Cases')

ax3 = fig.add_subplot(4, 1, 3)
changepoints_Henan = add_changepoints_to_plot(fig.gca(), gm_prophet_Henan, forecast_Henan)
gm_prophet_Henan.plot(forecast_Henan, ax=ax3)
ax3.set_title('Henan, Confirmed Coronavirus Cases')



ax4 = fig.add_subplot(4, 1, 4)
changepoints_Hunan = add_changepoints_to_plot(fig.gca(), gm_prophet_Hunan, forecast_Hunan)
gm_prophet_Hunan.plot(forecast_Hunan, ax=ax4)
ax4.set_title('Hunan, Confirmed Coronavirus Cases')




axes = fig.get_axes()
axes[0].set_xlabel('')
axes[0].set_ylabel('Cases')
axes[1].set_xlabel('')
axes[1].set_ylabel('Cases')
axes[2].set_xlabel('')
axes[2].set_ylabel('Cases')
axes[3].set_xlabel('Date')
axes[3].set_ylabel('Cases')

fig.savefig('temp.png', dpi=100)


In [None]:
fig1 = gm_prophet.plot(forecast)
plt.title('Hubei Coronavirus Cases by Day, actual and forecasted');
axes = fig1.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')


In [None]:
#added change points to forecast
from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet.plot(forecast)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet, forecast)
plt.title('Hubei Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')





### Forecast International Cases (ex Mainland China)

In [None]:
final_df_international['Date'][30:,]

In [None]:
final_df_international_adjust = final_df_international[(final_df_international['Date'] > '2/23/20') ]
final_df_international_adjust

In [None]:

final_df_international_adjust = final_df_international[(final_df_international['Date'] > '2020-02-20') ]
final_df_international_adjust['y']=final_df_international_adjust['total_cases']
final_df_international_adjust['ds']=final_df_international_adjust['Date']


In [None]:
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot

#NOTE, DEFAULT changepoint_prior_scale IS 0.05, DECREASING makes less flexible, INCREASIGN makes more flexible

changepoint_prior_scale_new_int=0.05
periods_new_int=30


In [None]:
#df_Hubei=final_df_MChina_Hubei
gm_prophet_int = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new_int)
gm_prophet_int.fit(final_df_international_adjust)
future_int = gm_prophet_int.make_future_dataframe(periods=periods_new_int)

forecast_int = gm_prophet_int.predict(future_int)


In [None]:
#added change points to forecast
from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet_int.plot(forecast_int)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet_int, forecast_int)
plt.title('International Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')


### Predictions of Euro Countries (changepoints)

In [None]:
final_df_Euro

In [None]:
df_France = final_df_Euro[final_df_Euro['name']== 'France France']
df_France

In [None]:


#define country DFs


Euro_list=[' Austria', ' Belgium', ' Bosnia and Herzegovina', ' Cyprus', ' Czech Republic', ' Denmark', ' Finland', ' France', ' Germany', ' Gibraltar', ' Hungary', ' Iceland', ' Ireland', ' Italy', ' Malta', ' Netherlands', ' Norway', ' Poland', ' Portugal', ' Spain', ' Sweden', ' Switzerland', ' United Kingdom', ' Albania', ' Bulgaria', ' Channel Islands', ' Faroe Islands', ' Greece', ' Liechtenstein', ' North Macedonia', ' Romania'

]

final_df_Euro=final_df_international[final_df_international['name'].isin(Euro_list)]


# France
df_France = final_df_Euro[final_df_Euro['name']== ' France']
df_France['y']= df_France['Cases']
df_France['ds']= df_France['Date']

#UK
df_UK= final_df_Euro[final_df_Euro['name']== ' United Kingdom']
df_UK['y']= df_UK['Cases']
df_UK['ds']= df_UK['Date']

# Italy 
df_Italy = final_df_Euro [final_df_Euro['name']== ' Italy']
df_Italy['y']= df_Italy['Cases']
df_Italy['ds']= df_Italy['Date']

#Spain
df_Spain = final_df_Euro [final_df_Euro['name']== ' Spain']
df_Spain['y']= df_Spain['Cases']
df_Spain['ds']= df_Spain['Date']

#Germany
df_Germany = final_df_Euro [final_df_Euro['name']== ' Germany']
df_Germany['y']= df_Germany['Cases']
df_Germany['ds']= df_Germany['Date']




In [None]:
#make predictions
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot


#NOTE, DEFAULT changepoint_prior_scale IS 0.05, DECREASING makes less flexible, INCREASIGN makes more flexible
changepoint_prior_scale_new2=0.2
periods_new=30


#France
gm_prophet_France = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new2)
gm_prophet_France.fit(df_France)
future_France = gm_prophet_France.make_future_dataframe(periods=periods_new)

#UK
gm_prophet_UK = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new2)
gm_prophet_UK.fit(df_UK)
future_UK = gm_prophet_UK.make_future_dataframe(periods=periods_new)


# Italy 

gm_prophet_Italy = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new2)
gm_prophet_Italy.fit(df_Italy)
future_Italy = gm_prophet_Italy.make_future_dataframe(periods=periods_new)

#Spain

gm_prophet_Spain = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new2)
gm_prophet_Spain.fit(df_Spain)
future_Spain = gm_prophet_Spain.make_future_dataframe(periods=periods_new)


#Germany

gm_prophet_Germany = Prophet(interval_width=0.95, changepoint_prior_scale=changepoint_prior_scale_new2)
gm_prophet_Germany.fit(df_Germany)
future_Germany = gm_prophet_Germany.make_future_dataframe(periods=periods_new)


In [None]:
#create forecast with upper and lower bounds
forecast = gm_prophet.predict(future)
#forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

#France
forecast_France = gm_prophet_France.predict(future_France)

#UK
forecast_UK = gm_prophet_UK.predict(future_UK)

#Italy
forecast_Italy = gm_prophet_Italy.predict(future_Italy)

#Spain
forecast_Spain = gm_prophet_Spain.predict(future_Spain)

# Germany 
forecast_Germany = gm_prophet_Germany.predict(future_Germany)


In [None]:
#create figures

#France
fig_France = gm_prophet_France.plot(forecast_France)
changepoints_France = add_changepoints_to_plot(fig_France.gca(), gm_prophet_France, forecast_France)

#UK
fig_UK = gm_prophet_UK.plot(forecast_UK)
changepoints_UK = add_changepoints_to_plot(fig_UK.gca(), gm_prophet_UK, forecast_UK)


#Italy
fig_Italy = gm_prophet_Italy.plot(forecast_Italy)
changepoints_Italy = add_changepoints_to_plot(fig_Italy.gca(), gm_prophet_Italy, forecast_Italy)


#Spain
fig_Spain = gm_prophet_Spain.plot(forecast_Spain)
changepoints_Spain = add_changepoints_to_plot(fig_Spain.gca(), gm_prophet_Spain, forecast_Spain)

# Germany 
fig_Germany = gm_prophet_Germany.plot(forecast_Germany)
changepoints_Germany = add_changepoints_to_plot(fig_Germany.gca(), gm_prophet_Germany, forecast_Germany)


In [None]:
from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet_France.plot(forecast_France)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet_France, forecast_France)
plt.title('France Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')

In [None]:

from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet_UK.plot(forecast_UK)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet_UK, forecast_UK)
plt.title('UK Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')

In [None]:

from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet_Spain.plot(forecast_Spain)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet_Spain, forecast_Spain)
plt.title('Spain Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')

In [None]:

from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet_Italy.plot(forecast_Italy)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet_Italy, forecast_Italy)
plt.title('Italy Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')

In [None]:


from fbprophet.plot import add_changepoints_to_plot
fig2= gm_prophet_Germany.plot(forecast_Germany)
a = add_changepoints_to_plot(fig2.gca(), gm_prophet_Germany, forecast_Germany)
plt.title('Germany Coronavirus Cases by Day, actual and forecasted with changepoints');
axes = fig2.get_axes()
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Cases')



In [None]:
import plotly.express as px


fig = px.line(final_df_international,title="International Cases outside Mainland China", x="Date", y="total_cases", color="name", line_group="name", render_mode="svg")
fig.for_each_trace(lambda t: t.update(name=t.name.replace("name=", "")))
fig.show()

## Flights Analysis

In [None]:
#https://pcfg.eu/posts/how-to-plot-flight-routes-using-plotly/

# Import libraries
import pandas as pd
#import plotly.plotly as py
import plotly.offline as ol
from geographiclib.geodesic import Geodesic
geod = Geodesic.WGS84

# Define function to calculate distance (in meters) between two points
def dist(p1Lat, p1Lon, p2Lat, p2Lon):
    return geod.Inverse(p1Lat, p1Lon, p2Lat, p2Lon, Geodesic.DISTANCE)['s12']

# Read the data into a dataframe (specifying the column names)

#read_conf = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/coronavirus_stats_conf2.csv', encoding = "ISO-8859-1")


#df = pd.read_csv('routes.dat', sep=',', header=None,
 #                names=['Airline','Airline ID','Source airport','Source airport ID',
  #                      'Destination airport','Destination airport ID','Codeshare',
   #                     'Stops','Equipment'])



df = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/routes.dat.txt',sep=',', header=None,
                 names=['Airline','Airline ID','Source airport','Source airport ID',
                        'Destination airport','Destination airport ID','Codeshare',
                        'Stops','Equipment'])



In [None]:
# Remove duplicates (only one trajectory per route)
df = df[['Source airport','Destination airport']].drop_duplicates(keep='first', inplace=False)

# Read the data into a dataframe (specifying the column names)
df_airports = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/airports.dat.txt', sep=',', header=None,
                          names=['Airport','Name','City','Country','IATA','ICAO',
                                 'Latitude','Longitude','Altitude','Timezone','DST',
                                 'Tz','Type','Source'])

In [None]:
## ALL LONDON AIRPORTS

# Select only those routes starting or ending from a London Airport
# in order London City, Heathrow, Gatwick, Luton, Stansted, Southend
df = df.loc[(df['Source airport'].isin(['LCY','LHR','LGW','LTN','STN','SEN'])) | (df['Destination airport'].isin(['LCY','LHR','LGW','LTN','STN','SEN']))]

# Append the origin airport's coordinates to the routes' dataframe
df = pd.merge(df, df_airports[['IATA','Latitude','Longitude']],
              how='inner', left_on='Source airport', right_on='IATA', suffixes=('_Orig','_Dest'))

# Append the destination airport's coordinates to the routes' dataframe
df = pd.merge(df, df_airports[['IATA','Latitude','Longitude']],
              how='inner', left_on='Destination airport', right_on='IATA', suffixes=('_Orig','_Dest'))

# Keep only Origin/Destination IATA ID columns, and their Latitude/Longitude
df = df.drop(columns=['Source airport','Destination airport'])

# Calculate the distance (great circle distance) between the origin and destination airports
df['Distance'] = ''
for index, row in df.iterrows():
    df.loc[df.index==index,'Distance'] = dist(row.Latitude_Orig, row.Longitude_Orig, row.Latitude_Dest, row.Longitude_Dest)/1000

# Initialise the data list that will be used to feed the plot
data = []

# Append all airports (blue dots) to the map
data.append(dict(
                type = 'scattergeo',
                locationmode = 'ISO-3',
                showlegend = False,
                lon = df_airports['Longitude'],
                lat = df_airports['Latitude'],
                hoverinfo = 'text',
                text = df_airports['IATA'],
                mode = 'markers',
                marker = dict(
                    size=2,
                    color='rgb(0, 0, 255)',
                    line = dict(
                        width=3,
                        color='rgba(68, 68, 68, 0)'
                    )
                ))
        )

# Append the longest route to the map
data.append(
        dict(
            type = 'scattergeo',
            locationmode = 'ISO-3',
            name = 'Longest Route',
            showlegend = True,
            lon = [ df.loc[df['Distance']==df['Distance'].max(),'Longitude_Orig'].values[0], df.loc[df['Distance']==df['Distance'].max(),'Longitude_Dest'].values[0] ],
            lat = [ df.loc[df['Distance']==df['Distance'].max(),'Latitude_Orig'].values[0], df.loc[df['Distance']==df['Distance'].max(),'Latitude_Dest'].values[0] ],
            mode = 'lines',
            line = dict(
                width = 2,
                color = 'red',
            ),
            opacity = 1,
        )
    )

# Append all other routes to the map
for i in range(len(df)):
    data.append(
        dict(
            type = 'scattergeo',
            locationmode = 'ISO-3',
            name = str(df['IATA_Orig'][i]) + ' - ' + str(df['IATA_Dest'][i]),
            showlegend = True if df['IATA_Orig'][i] in ['LCY','LHR','LGW','LTN','STN','SEN'] else False,
            lon = [ df['Longitude_Orig'][i], df['Longitude_Dest'][i] ],
            lat = [ df['Latitude_Orig'][i], df['Latitude_Dest'][i] ],
            mode = 'lines',
            line = dict(
                width = 1,
                color = 'green',
            ),
            opacity = 0.3,
        )
    )

# Define the plot's layout
layout = dict(
        title = 'Airports and Routes',
        showlegend = True,
        geo = dict(
            scope='world',
            projection=dict( type='azimuthal equal area' ),
            showland = True,
            landcolor = 'rgb(255, 255, 255)',
            countrycolor = 'rgb(0, 0, 0)',
        ),
    )

# Create the figure to be plotted
fig = dict( data=data, layout=layout )
#py.plot(fig, world_readable=True)
ol.plot(fig, filename='Airports and routes.html')

In [None]:
## Indian Airports


df_India=df_airports[df_airports['Country']=='India']
India_list=df_India['IATA'].tolist() 
India_list_new=list(set(India_list))


df_Iran=df_airports[df_airports['Country']=='Iran']
Iran_list=df_Iran['IATA'].tolist() 
Iran_list_new=list(set(Iran_list))

df_China=df_airports[df_airports['Country']=='China']
China_list=df_China['IATA'].tolist() 
China_list_new=list(set(China_list))

df_Italy=df_airports[df_airports['Country']=='Italy']
Italy_list=df_Italy['IATA'].tolist() 
Italy_list_new=list(set(Italy_list))

df_UK=df_airports[df_airports['Country']=='United Kingdom']
UK_list=df_UK['IATA'].tolist() 
UK_list_new=list(set(UK_list))

#final_df_MChina_Hubei=final_df_MChina[final_df_MChina['Province/State']=='Hubei']

#inal_df_MChina_Hubei=final_df_MChina[final_df_MChina['Province/State']=='Hubei']



In [None]:
UK_list_new

In [None]:
['LCY','LHR','LGW','LTN','STN','SEN']

In [None]:
# Import libraries
import pandas as pd
#import plotly.plotly as py
import plotly.offline as ol
from geographiclib.geodesic import Geodesic
geod = Geodesic.WGS84

# Define function to calculate distance (in meters) between two points
def dist(p1Lat, p1Lon, p2Lat, p2Lon):
    return geod.Inverse(p1Lat, p1Lon, p2Lat, p2Lon, Geodesic.DISTANCE)['s12']

# Read the data into a dataframe (specifying the column names)

#read_conf = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/coronavirus_stats_conf2.csv', encoding = "ISO-8859-1")


#df = pd.read_csv('routes.dat', sep=',', header=None,
 #                names=['Airline','Airline ID','Source airport','Source airport ID',
  #                      'Destination airport','Destination airport ID','Codeshare',
   #                     'Stops','Equipment'])



df = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/routes.dat.txt',sep=',', header=None,
                 names=['Airline','Airline ID','Source airport','Source airport ID',
                        'Destination airport','Destination airport ID','Codeshare',
                        'Stops','Equipment'])


In [None]:
# Remove duplicates (only one trajectory per route)
df = df[['Source airport','Destination airport']].drop_duplicates(keep='first', inplace=False)

# Read the data into a dataframe (specifying the column names)
df_airports = pd.read_csv('/Users/neil.watt/Documents/PythonScripts/Coronavirus/airports.dat.txt', sep=',', header=None,
                          names=['Airport','Name','City','Country','IATA','ICAO',
                                 'Latitude','Longitude','Altitude','Timezone','DST',
                                 'Tz','Type','Source'])

In [None]:
## ALL LONDON AIRPORTS

# Select only those routes starting or ending from a London Airport
# in order London City, Heathrow, Gatwick, Luton, Stansted, Southend

Country_List=UK_list_new

#India_list_new

# note pipemeans OR
#df = df.loc[(df['Source airport'].isin(Iran_list_new)) | (df['Destination airport'].isin(India_list_new))]
df = df.loc[(df['Source airport'].isin(Country_List)) | (df['Destination airport'].isin(Country_List))]

#df = df.loc[(df['Source airport'].isin(['LCY','LHR','LGW','LTN','STN','SEN'])) | (df['Destination airport'].isin(['LCY','LHR','LGW','LTN','STN','SEN']))]

# Append the origin airport's coordinates to the routes' dataframe
df = pd.merge(df, df_airports[['IATA','Latitude','Longitude']],
              how='inner', left_on='Source airport', right_on='IATA', suffixes=('_Orig','_Dest'))

# Append the destination airport's coordinates to the routes' dataframe
df = pd.merge(df, df_airports[['IATA','Latitude','Longitude']],
              how='inner', left_on='Destination airport', right_on='IATA', suffixes=('_Orig','_Dest'))

# Keep only Origin/Destination IATA ID columns, and their Latitude/Longitude
df = df.drop(columns=['Source airport','Destination airport'])

# Calculate the distance (great circle distance) between the origin and destination airports
df['Distance'] = ''
for index, row in df.iterrows():
    df.loc[df.index==index,'Distance'] = dist(row.Latitude_Orig, row.Longitude_Orig, row.Latitude_Dest, row.Longitude_Dest)/1000

# Initialise the data list that will be used to feed the plot
data = []

# Append all airports (blue dots) to the map
data.append(dict(
                type = 'scattergeo',
                locationmode = 'ISO-3',
                showlegend = False,
                lon = df_airports['Longitude'],
                lat = df_airports['Latitude'],
                hoverinfo = 'text',
                text = df_airports['IATA'],
                mode = 'markers',
                marker = dict(
                    size=2,
                    color='rgb(0, 0, 255)',
                    line = dict(
                        width=3,
                        color='rgba(68, 68, 68, 0)'
                    )
                ))
        )

# Append the longest route to the map
data.append(
        dict(
            type = 'scattergeo',
            locationmode = 'ISO-3',
            name = 'Longest Route',
            showlegend = True,
            lon = [ df.loc[df['Distance']==df['Distance'].max(),'Longitude_Orig'].values[0], df.loc[df['Distance']==df['Distance'].max(),'Longitude_Dest'].values[0] ],
            lat = [ df.loc[df['Distance']==df['Distance'].max(),'Latitude_Orig'].values[0], df.loc[df['Distance']==df['Distance'].max(),'Latitude_Dest'].values[0] ],
            mode = 'lines',
            line = dict(
                width = 2,
                color = 'red',
            ),
            opacity = 1,
        )
    )


#India_list_new

# Append all other routes to the map
for i in range(len(df)):
    data.append(
        dict(
            type = 'scattergeo',
            locationmode = 'ISO-3',
            name = str(df['IATA_Orig'][i]) + ' - ' + str(df['IATA_Dest'][i]),
            #showlegend = True if df['IATA_Orig'][i] in India_list_new else False,
            #showlegend = True if df['IATA_Orig'][i] in ['LCY','LHR','LGW','LTN','STN','SEN'] else False,
            lon = [ df['Longitude_Orig'][i], df['Longitude_Dest'][i] ],
            lat = [ df['Latitude_Orig'][i], df['Latitude_Dest'][i] ],
            mode = 'lines',
            line = dict(
                width = 1,
                color = 'green',
            ),
            opacity = 0.3,
        )
    )

# Define the plot's layout
layout = dict(
        title = 'UK Airports and Routes',
      
    title_x=0.5,
    showlegend = False,
        geo = dict(
            scope='world',
            projection=dict( type='azimuthal equal area' ),
            showland = True,
            landcolor = 'rgb(255, 255, 255)',
           # countrycolor = 'rgb(255, 255, 255)',
            showcountries=True, countrycolor="Black"
            #showcountries = True,
            
            
        ),
    #title='Iran Airports and Routes',
    )



# Create the figure to be plotted
fig = dict( data=data, layout=layout )

#py.plot(fig, world_readable=True)
ol.plot(fig, filename='Airports and routes.html')

In [None]:
#https://towardsdatascience.com/catching-that-flight-visualizing-social-network-with-networkx-and-basemap-ce4a0d2eaea6

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import mpl_toolkits
import Basemap as Basemap
#from mpl_toolkits.basemap import Basemap as Basemap
#from mpl_toolkits import Basemap as Basemap
#from mpl_toolkits.basemap import Basemap as Basemap

In [None]:
graph = nx.from_pandas_dataframe(routes_us, source = 'Source Airport', target = 'Dest Airport', edge_attr = 'number of flights',create_using = nx.DiGraph())