In [None]:
# importing libraries
import pandas as pd
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pyplot as cm
from matplotlib.colors import hsv_to_rgb
from matplotlib.ticker import PercentFormatter,FuncFormatter
import matplotlib.ticker as mtick
from cycler import cycler
import matplotlib
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
# Loading the data 
path = './Data/'
filename ='city_code_en.csv'
filename2 = 'flow_20190121_20190301_updated.csv'
filename3 = 'flow_20200110_20200430_all.csv'

# loading city code file
df1 = pd.read_csv(path+filename,index_col='city_id')
df1.head()

### Data Cleaning and Exploration

Trying to firgure out the encryption issue with column with 'Kunshan City' in the city_code_en file

In [None]:
df1.head(81)

In [None]:
df1.drop('320583 Kunshan City',inplace=True)

In [None]:
df1.head(81)

In [None]:
df1.reset_index(drop=False,inplace=True)

In [None]:
new_entry = ['320583', 'Kunshan City', '320000','Jiangsu Province']  
df1.append(pd.Series(new_entry, index=df1.columns[:len(new_entry)]), ignore_index=True)

In [None]:
df1.head()

In [None]:
name_list=df1['city_name'].tolist()

In [None]:
d =  Counter(name_list)  # -> Counter
res = [k for k, v in d.items() if v > 1]
print(res)

In [None]:
prov=df1['pro_name'].tolist()
d =  Counter(prov)  # -> Counter
provresult = [k for k, v in d.items() if v > 1]
print(provresult)

In [None]:
df1.shape

In [None]:
df1['city_id'].nunique()

## Data cleaning and Aggregation

In [None]:
# Reading in the mobility data
df2=pd.read_csv(path+filename2)
crs={'init':'EPSG:4326'}
df2.head()

In [None]:
df2["city_id_o"]=df2["city_id_o"].astype(int)

In [None]:
df2["city_id_d"]=df2["city_id_d"].astype(int)

In [None]:
df2["city_id_o"]=df2["city_id_o"].astype(str)

In [None]:
df2["city_id_d"]=df2["city_id_d"].astype(str)

In [None]:
df2.head()

In [None]:
df2.shape

In [None]:
df2['city_id_o'].nunique()

In [None]:
df2['city_id_d'].nunique()

In [None]:
# Dropping the columns in foreign language
df2.drop(df2.iloc[:, 0:2], axis=1, inplace=True)
df2.head()

In [None]:
# Mapping columns with city_id_o and city_id_d with origin city and destination city in the main flows dataframe
df2["origin_city"] = df2["city_id_o"].map(df1.set_index("city_id")["city_name"])
df2["dest_city"] = df2["city_id_d"].map(df1.set_index("city_id")["city_name"])
df2["origin_province"] = df2["city_id_o"].map(df1.set_index("city_id")["pro_name"])
df2["dest_province"] = df2["city_id_d"].map(df1.set_index("city_id")["pro_name"])

In [None]:
df2['city_id_o'].nunique()

In [None]:
df2['city_id_d'].nunique()

In [None]:
df2['origin_city'].nunique()

In [None]:
df2['origin_province'].nunique()

In [None]:
df2.head()

In [None]:
df2["origin_city"] = df2["origin_city"].str.replace("City","")
df2["origin_province"] = df2["origin_province"].str.replace("Province","")
df2["dest_city"] = df2["dest_city"].str.replace("City","")
df2["dest_province"] = df2["dest_province"].str.replace("Province","")

In [None]:
df2['origin_city'].nunique()

In [None]:
df2['city_id_o'].nunique()

In [None]:
# Reordering the dataframe for better layout 
cols_to_order=['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d']
new_columns = cols_to_order + (df2.columns.drop(cols_to_order).tolist())
df2=df2[new_columns]
df2.head()

In [None]:
df2.shape

In [None]:
# Aggregating weekly data for the whole data file
a= df2.set_index(['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d']).rename(columns=lambda x: pd.to_datetime(x))
a = a.resample('W', axis=1).sum().reset_index()
a.head(20)

In [None]:
a.shape

In [None]:
old_names = a.columns.tolist()
old_names

In [None]:
new_names = ['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d','Week1','Week2','Week3','Week4','Week5','Week6'] 

In [None]:
a = a.rename(columns=dict(zip(old_names, new_names))) 
a.head()

In [None]:
#saving the weekly aggregated file
#a.to_csv('./Data/weekly_aggregation_final.csv',index=False)

### Descriptive Statistics

In [None]:
#Total sum per column: 
totl=a.copy()
tot=totl.drop(['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d'],axis=1)
tot.loc['Total',:]= totl.sum(axis=0)

#Total sum per row: 
tot.loc[:,'Total'] = totl.sum(axis=1)
#tot.to_csv('./Data/summarytables/total_sum_by_column2019.csv',index=False)

In [None]:
tot.tail(1)

In [None]:
# Total sum by Origin City 
orig_city_sum=a.groupby(['origin_city'])[["Week1", "Week2", "Week3","Week4","Week5","Week6"]].sum().reset_index()
orig_city_sum.tail(5)
#orig_city_sum.to_csv('./Data/summarytables/2019/total_sum_by_origin_city_2019.csv',index=False)

In [None]:
# Total sum by Destination City 
dest_city_sum=a.groupby(['dest_city'])[["Week1", "Week2", "Week3","Week4","Week5","Week6"]].sum().reset_index()
#dest_city_sum.to_csv('./Data/summarytables/2019/total_sum_by_destination_city_2019.cs',index=False)
dest_city_sum.tail(5)

In [None]:
# Total sum by Origin Province 
orig_prov_sum=a.groupby(['origin_province'])[["Week1", "Week2", "Week3","Week4","Week5","Week6"]].sum().reset_index()
#orig_prov_sum.to_csv('./Data/summarytables/2019/total_sum_by_origin_province2019.csv',index=False)
orig_prov_sum.tail(5)

In [None]:
# Total sum by Destination Province 
dest_prov_sum=a.groupby(['dest_province'])[["Week1", "Week2", "Week3","Week4","Week5","Week6"]].sum().reset_index()
#dest_prov_sum.to_csv('./Data/summarytables/2019/total_sum_by_destination_province2019.csv',index=False)
dest_prov_sum.tail(5)

In [None]:
# Percentage by origin city 
tot_orig_perc=a.groupby(['origin_city','origin_province']).sum().reset_index()
#tot_orig_perc.drop(['X_o','Y_o','X_d','Y_d'],axis=1,inplace=True)
total = np.sum(tot_orig_perc.loc[:,'Week1':].values)
tot_orig_perc['Percent'] = tot_orig_perc.loc[:,'Week1':].sum(axis=1)/total * 100
#tot_orig_perc.to_csv('./Data/summarytables/2019/total_origin_perc2019.csv',index=False)
tot_orig_perc

In [None]:
# Percentage by destination city
tot_dest_perc=a.groupby(['dest_city','dest_province']).sum().reset_index()
#tot_orig_perc.drop(['X_o','Y_o','X_d','Y_d'],axis=1,inplace=True)
total = np.sum(tot_dest_perc.loc[:,'Week1':].values)
tot_dest_perc['Percent'] = tot_dest_perc.loc[:,'Week1':].sum(axis=1)/total * 100
#tot_dest_perc.to_csv('./Data/summarytables/total_dest_perc2019.csv',index=False)
tot_dest_perc

In [None]:
# Percentage by origin province
tot_orig_percprov=a.groupby(['origin_province']).sum().reset_index()
#tot_orig_percprov.drop(['X_o','Y_o','X_d','Y_d'],axis=1,inplace=True)
total = np.sum(tot_orig_percprov.loc[:,'Week1':].values)
tot_orig_percprov['Percent'] = tot_orig_percprov.loc[:,'Week1':].sum(axis=1)/total * 100
#tot_orig_percprov.to_csv('./Data/summarytables/total_orig_provperc2019.csv',index=False)
tot_orig_percprov

In [None]:
# Percentage by destination province
tot_dest_percprov=a.groupby(['dest_province']).sum().reset_index()
#tot_orig_percprov.drop(['X_o','Y_o','X_d','Y_d'],axis=1,inplace=True)
total = np.sum(tot_dest_percprov.loc[:,'Week1':].values)
tot_dest_percprov['Percent'] = tot_dest_percprov.loc[:,'Week1':].sum(axis=1)/total * 100
#tot_dest_percprov.to_csv('./Data/summarytables/total_dest_provperc2019.csv',index=False)
tot_dest_percprov

In [None]:
a.shape

In [None]:
a['city_id_o'].dtype

In [None]:
a['city_id_o']=a['city_id_o'].astype(int)

In [None]:
a.describe()

In [None]:
a['origin_city'].nunique()

In [None]:
# Aggregating by month
a_m= df2.set_index(['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d']).rename(columns=lambda x: pd.to_datetime(x))
a_m = a_m.resample('M', axis=1).sum().reset_index()
a_m.head(20)

In [None]:
# Describing the aggregation by week
a.describe()

In [None]:
# Describing the aggregation by week
a_m.describe()

In [None]:
#df2.to_csv('./Data/final_file.csv')

In [None]:
df2['city_id_o'].nunique()

In [None]:
df2['origin_city'].nunique()

### Data Manipulation for Mapping

Getting flows and locations files for the Flowmap visualizations

In [None]:
# Pivoting the data file 
visual_df=df2.set_index(['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d']).stack().reset_index().rename(columns = {'level_6' : 'Date', 0: 'Val'})
visual_df.head()

In [None]:
visual_df['Date'].dtypes

In [None]:
visual_df.shape

In [None]:
# converting to datetime format
visual_df['Date'] = pd.to_datetime(visual_df['Date'], format= '%Y/%m/%d')

In [None]:
visual_df.dtypes

In [None]:
#Total number of traveling devices by week
total_by_week=visual_df.groupby(visual_df.Date.dt.strftime('%W')).Val.sum().reset_index()
#total_by_week.to_csv('./Data/summarytables/total_by_week2019.csv',index=False)
total_by_week

In [None]:
total=total_by_week['Val'].sum()
total

In [None]:
# Bar plot by percentage
fig, ax = plt.subplots(figsize=(40, 20))

percent = total_by_week['Val']/total*100
weeks=['Week 1','Week 2','Week 3','Week 4','Week 5','Week 6']
new_labels = [i+'  {:.2f}%'.format(j) for i, j in zip(weeks, percent)]

plt.barh(weeks, total_by_week['Val'], color='gray', edgecolor='red')
plt.yticks(range(len(weeks)), new_labels,fontsize=30)
ax.set_title('Mobility Percent by Week in 2020', fontsize=35)
plt.tight_layout()

for spine in ax.spines.values():
    spine.set_visible(False)

ax.axes.get_xaxis().set_visible(False)
ax.tick_params(axis="y", left=False)
#plt.savefig('./Media/MobilityPerc_by_week_2019.png')
plt.show()

In [None]:
visual_df.shape

In [None]:
visual_df.describe()

In [None]:
# Aggregating the whole data set by weekly and starting with every Monday to reduce and visualize it better
df_final = (visual_df
     .reset_index()
     .set_index("Date")
     .groupby(['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d',pd.Grouper(freq='D')])["Val"].sum()
     .astype(int)
     .reset_index())
df_final.head()

In [None]:
# Aggregating the whole data set by weekly and starting with every Monday to reduce and visualize it better
df_finalW = (visual_df
     .reset_index()
     .set_index("Date")
     .groupby(['origin_city','origin_province','city_id_o','dest_city','dest_province','city_id_d',pd.Grouper(freq='W-MON')])["Val"].sum()
     .astype(int)
     .reset_index())
df_finalW.head()

In [None]:
df_finalW.shape

In [None]:
df_total=df_finalW.groupby(['Date']).agg({'Val':['sum']}).reset_index()
df_total.columns=['Date','Mobility']
total = np.sum(df_total.loc[:,'Mobility':].values)
df_total['Percent'] = df_total.loc[:,'Mobility':].sum(axis=1)/total * 100
#df_total.to_csv('./Data/summarytables/total_perc_by_week2019.csv',index=False)
df_total

In [None]:
df_final.shape

In [None]:
# Reducing the data shape to filter out the lowest values for the mobility
df_mapping = df_final[~(df_final['Val'] < 50)]  
df_mapping.head()

In [None]:
df_date=df_mapping.groupby(['Date']).agg({'Val':['sum']}).reset_index()

In [None]:
df_date.columns=['Date','Mobility']
df_date.head()

In [None]:
# Time series plot by day
plt.rc('font', size=12)
fig, ax = plt.subplots(figsize=(40, 20))
df_date.reset_index()
# Specify how our lines should look
ax.plot(df_date.Date, df_date.Mobility, color='darkslateblue', label='Mobility2019',linewidth = 7)
# Same as above
ax.set_xlabel('Date', fontsize=22)
ax.set_ylabel('Mobility in Millions', fontsize=22)
ax.set_title('Mobility by Day in 2019', fontsize=26)
plt.xticks(fontsize=18)
plt.yticks(fontsize=20)
ax.grid(b=True, which='major', color='#666666', linestyle='-')
#plt.savefig('./Media/Mobility_by_day_2019.png')
ax.legend(loc='upper left');

In [None]:
df_dateW=df_finalW.groupby(['Date']).agg({'Val':['sum']}).reset_index()

In [None]:
df_dateW.columns=['Date','Mobility']
df_dateW.head()

In [None]:
# time series plot by weeks
plt.rc('font', size=12)
fig, ax = plt.subplots(figsize=(40, 20))
df_dateW.reset_index()
# Specify how our lines should look
ax.plot(df_dateW.Date, df_dateW.Mobility, color='palevioletred', label='Mobility2019',linewidth = 7)
# Same as above
ax.set_xlabel('Date', fontsize=26)
ax.set_ylabel('Mobility in Millions', fontsize=26)
ax.set_title('Mobility by Week in 2019', fontsize=30)
plt.xticks(fontsize=26)
plt.yticks(fontsize=26)
ax.grid(b=True, which='major', color='#666666', linestyle='-')
#plt.savefig('./Media/Mobility_by_week_2019.png')
ax.legend(loc='upper left');

In [None]:
# Creating file to utilise both city ids and value with date in a visualization
df4=df_mapping.drop(['origin_province','origin_city','dest_province','dest_city'],axis=1)
df4.head()

In [None]:
# Reordering the dataframe for better layout 
cols_to_order=['city_id_o','city_id_d','Val','Date']
new_columns = cols_to_order + (df4.columns.drop(cols_to_order).tolist())
df4=df4[new_columns]
df4.head()

In [None]:
#df4.groupby(['origin_city','dest_city','Date'])['Val'].sum()
df4['Date'] = pd.to_datetime(df4['Date']).dt.strftime('%Y-%m-%d')
df4.head()

In [None]:
old_names = df4.columns.tolist()
old_names

In [None]:
new_names=['origin','dest','count','time']

In [None]:
df4 = df4.rename(columns=dict(zip(old_names, new_names))) 
df4.head()

In [None]:
#saving the csv for mapping
#df4.to_csv('./Data/flows2019.csv',index=False)

In [None]:
#saving the csv for mapping
#df_finalW.to_csv('./Data/mapping_visual2019by_weel.csv',index=False)

In [None]:
df_map=df_mapping.groupby(['origin_city','city_id_o']).agg({'Val':['sum']}).reset_index()
df_map.head()

In [None]:
df_map.columns=['origin_city','city_id_o','Val']
df_map.head()

In [None]:
df_map.columns

In [None]:
df_map.dtypes

In [None]:
df_map['city_id_o']=df_map['city_id_o'].astype(int)

In [None]:
df_map.dtypes

#### Slight data manipulation because the 2019 file does not have latitude and longitude

In [None]:
# loading locations file
df_loc = pd.read_csv('./Data/locations.csv')
df_loc.head()

In [None]:
df_loc.dtypes

In [None]:
df_map_orig=pd.merge(df_map, df_loc, left_on='city_id_o', right_on='id', how='left').drop(['id','name'],axis=1)
df_map_orig.head()

In [None]:
df_map_orig.isna().sum()

In [None]:
df_map_orig.shape

In [None]:
df_maps=df_mapping.groupby(['dest_city','city_id_d']).agg({'Val':['sum']}).reset_index()
df_maps.head()

In [None]:
df_maps.columns=['dest_city','city_id_d','Val']
df_maps.head()

In [None]:
df_maps.dtypes

In [None]:
df_maps['city_id_d']=df_maps['city_id_d'].astype(int)

In [None]:
df_maps.dtypes

In [None]:
df_map_dest=pd.merge(df_maps, df_loc, left_on='city_id_d', right_on='id', how='left').drop(['id','name'],axis=1)
df_map_dest.head()

In [None]:
df_map_dest.isna().sum()

In [None]:
#df_map_orig.to_csv('./Data/mapping_origin2019.csv',index=True)

In [None]:
#df_map_dest.to_csv('./Data/mapping_dest2019.csv',index=True)

### Total Mobility Count Analysis

In [None]:
# Aggregating by Origin City and Origin Province
orig_df=df2.drop(df2.iloc[:, 2:6], axis=1)
orig_df.head()

In [None]:
orig_df.shape

In [None]:
# just origin city
origin_city_df=orig_df.drop(columns=['origin_province'])
origin_city_df.head()

In [None]:
bar_orig=origin_city_df.groupby('origin_city').sum().sum()

In [None]:
# Origin by weeks
by_wks=a.drop(a.iloc[:, 2:6], axis=1)
by_wks.head()

In [None]:
# just origin city
origin_city_wks=by_wks.drop(columns=['origin_province'])
origin_city_wks.head()

In [None]:
bar_orig_wks=origin_city_wks.groupby('origin_city').sum().sum()

In [None]:
# Bar plot by mobility count for Day
ax=bar_orig.plot(kind='bar',color='slategrey')
plt.gcf().set_size_inches(20, 10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
#formatter = FuncFormatter(lambda y, pos: "%d%%" % (y))
#ax.yaxis.set_major_formatter(formatter)
plt.title('Total Mobility Count by Day for 2019', fontsize=26)
plt.xlabel('Day', fontsize=22)
plt.ylabel('Count in Millions', fontsize=22)
#changing ylables ticks
#y_value=['{:,.2f}'.format(x) + '%' for x in ax.get_yticks()]
#ax.set_yticklabels(y_value)
#ax.yaxis.set_major_formatter(mtick.PercentFormatter()) 
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/total_origin_mobility_2019_By_Day.png') 
plt.show()


In [None]:
# bar plot for day
ax=bar_orig.plot(kind='barh',color='slategrey')
fig=plt.gcf().set_size_inches(30, 20)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
for i, v in enumerate(bar_orig):
    ax.text(v + 3, i + .25, str(v), color='red', fontweight='bold', fontsize=18,horizontalalignment='center')
plt.title('Total Mobility Count by Day for 2019\n', fontsize=28)
plt.xlabel('Count in Millionsy', fontsize=25)
plt.ylabel('Day', fontsize=25)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.yticks(fontsize=22)
plt.tight_layout()
#plt.savefig('./Media/total_origin_mobility_2019_By_Day_2.png') 
plt.show()

In [None]:
df2['20190222'].sum()

In [None]:
# bar plot by weeks
ax=bar_orig_wks.plot(kind='bar',color='slategrey')
plt.gcf().set_size_inches(20, 10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
plt.title('Total Mobility Count by Weeks for 2019', fontsize=26)
plt.xlabel('Week', fontsize=22)
plt.ylabel('Count in Millions', fontsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/total_origin_mobility_2019_By_Weeks.png') 
plt.show()


In [None]:
# bar plot by weeks (horizontal)
ax=bar_orig_wks.plot(kind='barh',color='slategrey')
plt.gcf().set_size_inches(20, 10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
for i, v in enumerate(bar_orig_wks):
    ax.text(v + 3, i + .25, str(v), color='red', fontweight='bold', fontsize=16,horizontalalignment='center')
plt.title('Total Mobility Count by Weeks for 2019', fontsize=26)
plt.xlabel('Count in Millions', fontsize=22)
plt.ylabel('Week', fontsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/total_origin_mobility_2019_By_Weeks_2.png') 
plt.show()


### Origin City Analysis

In [None]:
# grouping origin city by total sum daily
origin_city_aggreagted_by_day=origin_city_wks.groupby(['origin_city']).sum()
origin_city_aggreagted_by_day.head()

In [None]:
# grouping to check the total sum of the values of movement for each origin province
o_prov=by_wks.groupby(['origin_province']).sum()
o_prov.head()

In [None]:
o_prov['origin_province']=o_prov.index

In [None]:
# Plotting a bar plot to understand the main hotspots in the origin cities for travelling
ax=o_prov.plot(kind='bar',width=0.9,cmap='magma')
plt.gcf().set_size_inches(30, 10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
plt.title('Mobility Count from the Province of Origin over 6 Weeks for 2019', fontsize=26)
plt.xlabel('Province', fontsize=22)
plt.ylabel('Count in Millions', fontsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/origin_mobility_2019.png') 
plt.show()


In [None]:
# Plotting a bar plot to understand the main hotspots in the origin cities for travelling
ax=o_prov.plot(kind='barh',width=0.9,cmap='magma')
plt.gcf().set_size_inches(20, 30)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
plt.title('Mobility Count from the Province of Origin over 6 Weeks for 2019', fontsize=26)
plt.xlabel('Province', fontsize=22)
plt.ylabel('Count in Millions', fontsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/origin_mobility_2019_2.png') 
plt.show()


In [None]:
# aggregating by weeks for origin city
df_orig=a.groupby(['origin_city']).sum()

In [None]:
a.groupby(['origin_province']).max()

In [None]:
df_orig.head()

### Departure City Analysis

In [None]:
dep=a.drop(df2.iloc[:, 0:3], axis=1)
dep.head()

In [None]:
dep.drop('city_id_d',axis=1,inplace=True)

In [None]:
dep.head()

In [None]:
dep_bar=dep.groupby('dest_city').sum().sum()
dep_bar

In [None]:
d_prov=dep.groupby(['dest_province']).sum()
d_prov.head()

In [None]:
d_prov['dest_province']=d_prov.index

In [None]:
# Plotting a bar plot to understand the main hotspots in the destination cities for travelling
ax=d_prov.plot(kind='bar',width=0.9,cmap='magma')
plt.gcf().set_size_inches(30, 10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
plt.title('Mobility Count to the Province of Destination over 6 Weeks', fontsize=26)
plt.xlabel('Province', fontsize=22)
plt.ylabel('Count in Millions', fontsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/destination_mobility_2019.png')
plt.show()


In [None]:
# Plotting a bar plot to understand the main hotspots in the destination cities for travelling
ax=d_prov.plot(kind='barh',width=0.9,cmap='magma')
plt.gcf().set_size_inches(20, 30)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.get_legend()
plt.title('Mobility Count to the Province of Destination over 6 Weeks', fontsize=26)
plt.xlabel('Count in Millions', fontsize=22)
plt.ylabel('Province', fontsize=22)
plt.grid(b=True, which='major', color='#666666', linestyle='-')
plt.xticks(fontsize=17)
plt.yticks(fontsize=20)
#plt.savefig('./Media/destination_mobility_2019_2.png')
plt.show()

### Saving data for Self Organizing Maps

In [None]:
mod=a.copy()
mod.head()

In [None]:
#saving the file
#mod.to_csv('./Data/som2019.csv', index=False)