# Import


In [None]:
#import drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#set directory
import os
os.chdir('/content/drive/MyDrive/thesis-folder')

In [None]:
!pip install python-louvain



In [None]:
#import necessary packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy as sc
import datetime
import calendar
import networkx as nx
from community import community_louvain
import matplotlib.cm as cm
import json

#LOUVAIN IMPLEMENTATION

In [None]:
#there are certain intersecting city values for example, there are 2 cities named 'Dublin' where Dublin city is the capital of Ireland as well as there is a city named Dublin in US 
def handle_intersecting_cities(df):
  append_us = '_us'
  intersecting_cities = [city for city in set(df.us_city) if city in set(df.foreign_city)]
  print("intersecting cities: {}".format(intersecting_cities))
  for ic in intersecting_cities:
    indices = np.where(df['us_city'] == ic)
    for idx in indices:
      df.loc[idx,'us_city'] = df.loc[idx,'us_city'] + append_us
  return df

#create graphs and community
def create_graph_and_community(df, source_city, target_city,edge_attribute, season):
  graph = nx.from_pandas_edgelist(df, source=source_city, 
                                  target=target_city,
                                  edge_attr=edge_attribute,
                                  create_using=nx.MultiGraph())
  print("Number of nodes: {}".format(len(graph.nodes)))
  print("Number of edges: {}".format(len(graph.edges)))

  '''
  #computing the best partition
  summer_partition = community_louvain.best_partition(pax_summer_graph)
  partition_dict = json.dumps(summer_partition)
  f = open("intermittent-result/partition_dict.json", "w")
  f.write(partition_dict)
  f.close()
  '''
  f = open(os.path.join("intermittent-result/International US/",'Intl_'+season+'_partition_dict.json'),)
  partition = json.load(f)
  print("Partition length: {}".format(len(partition)))
  return graph, partition

#set label for community
def setCommunityNumber(df,indexLists,community_label,val):
  for idx in indexLists:
    df.loc[idx,community_label] = int(float(val))
  return df

#map the community details to master data
def map_community_number(df,partition):
  us_city_lists = []
  foreign_city_lists = []
  df['us_community'] = ''
  df['foreign_community'] = ''
  df['us_foreign_community'] = ''

  us_city_lists = set(df['us_city'])
  foreign_city_lists = set(df['foreign_city'])
  print('Unique US cities in the dataset is:' , len(us_city_lists))
  print('Unique Foreign cities in the dataset is:', len(foreign_city_lists))

  for key,val in partition.items():
    if key in us_city_lists:
      indices = np.where(df['us_city'] == key)
      df = setCommunityNumber(df,indices,'us_community',int(val))
    elif key in foreign_city_lists:
      indices = np.where(df['foreign_city'] == key)
      df = setCommunityNumber(df,indices,'foreign_community',val)
    
  df['us_foreign_community'] = df['us_community'].astype(str) + '-' + df['foreign_community'].astype(str)
  return df

#list the sumamry of community
def community_summary(df, partition_df):
  #retrieve total number of passengers
  total = df['total']
  us_community = df['us_community']
  fg_community = df['foreign_community']
  comm_ctr = {}

  for i in range(0, len(total)):
    if(us_community[i] == fg_community[i]):
      if(us_community[i] in comm_ctr):
        comm_ctr[us_community[i]] += total[i]
      else:
        comm_ctr[us_community[i]] = total[i]
    else:
      if((us_community[i] in comm_ctr) & (fg_community[i] in comm_ctr)):
        comm_ctr[us_community[i]] += total[i]
        comm_ctr[fg_community[i]] += total[i]
      else:
        comm_ctr[us_community[i]] = total[i]
        comm_ctr[fg_community[i]] = total[i]
      
  print("Communities: {}".format(comm_ctr))
  
  community_pax_df = pd.DataFrame.from_dict(comm_ctr, orient='index')
  community_pax_df.reset_index(inplace=True)
  community_pax_df.rename(columns={'index':'community', 0:'total_pax'}, inplace = True)

  summary_df = partition_df.groupby(['community']).count()
  summary_df.reset_index(inplace=True)
  summary_df = pd.merge(summary_df, community_pax_df, on=['community'], how='inner')

  return summary_df

##SUMMER 

In [None]:
#read file
us_summer_df = pd.read_csv('intermittent-result/International US/us_summer_pax_data.csv')
us_summer_df.head()

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total
0,aguadilla,port of spain,trinidad and tobago,1990,august,8,18.449655,-67.118476,-61.518017,10.657268,0,467,467
1,albany,montreal,canada,1990,august,8,42.651167,-73.754968,-73.610364,45.497216,45,0,45
2,albany,toronto,canada,1990,august,8,42.651167,-73.754968,-79.383935,43.653482,91,0,91
3,anchorage,amsterdam,netherlands,1990,august,8,61.216313,-149.894852,4.893604,52.37276,15672,0,15672
4,anchorage,brussels,belgium,1990,august,8,61.216313,-149.894852,4.359779,50.843735,4120,0,4120


In [None]:
#handle intersecting cities, create graph, community and map the community details to master data
us_summer_df = handle_intersecting_cities(us_summer_df)
pax_summer_graph, summer_partition = create_graph_and_community(us_summer_df, 'us_city','foreign_city','total','summer')
us_summer_df = map_community_number(us_summer_df, summer_partition)
us_summer_df.head()

intersecting cities: ['manchester', 'sault ste. marie', 'bristol', 'georgetown', 'st. johns', 'san jose', 'melbourne', 'victoria', 'marathon', 'dublin', 'naples', 'panama city', 'birmingham', 'rome', 'rota', 'san antonio', 'latrobe', 'koror', 'st. petersburg', 'rosario', 'trenton']
Number of nodes: 1348
Number of edges: 111162
Partition length: 1348
Unique US cities in the dataset is: 491
Unique Foreign cities in the dataset is: 857


Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,aguadilla,port of spain,trinidad and tobago,1990,august,8,18.449655,-67.118476,-61.518017,10.657268,0,467,467,6,6,6-6
1,albany,montreal,canada,1990,august,8,42.651167,-73.754968,-73.610364,45.497216,45,0,45,1,2,1-2
2,albany,toronto,canada,1990,august,8,42.651167,-73.754968,-79.383935,43.653482,91,0,91,1,2,1-2
3,anchorage,amsterdam,netherlands,1990,august,8,61.216313,-149.894852,4.893604,52.37276,15672,0,15672,3,3,3-3
4,anchorage,brussels,belgium,1990,august,8,61.216313,-149.894852,4.359779,50.843735,4120,0,4120,3,1,3-1


In [None]:
#print the unique communities that are created using Louvain implementation
summer_partition_df = pd.DataFrame.from_dict(summer_partition, orient='index')
summer_partition_df.reset_index(inplace=True)
summer_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print(summer_partition_df['community'].unique())

[ 6  1  2  3  5  7  9 10  0  8  4]


Modularity scores

In [None]:
#modularity score
community_louvain.modularity(summer_partition, pax_summer_graph)

0.37151838745920434

In [None]:
#map community details
summer_community_df = pd.merge(summer_partition_df, us_summer_df, how='left', left_on='city', right_on='us_city')
summer_community_df.dropna(inplace=True)
summer_community_df.drop(columns=['us_city', 'foreign_city', 'foreign_country','foreign_lat','foreign_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'us_lat':'first','us_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
summer_community_df = summer_community_df.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
summer_community_df.rename(columns = {'us_lat':'latitude', 'us_long':'longitude'}, inplace=True)
summer_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,5,1991.0,june,6.0,32.44645,-99.747591,99.0,0.0,99.0
1,abilene,5,1992.0,june,6.0,32.44645,-99.747591,213.0,0.0,213.0
2,abilene,5,1993.0,june,6.0,32.44645,-99.747591,118.0,132.0,250.0
3,abilene,5,1994.0,august,8.0,32.44645,-99.747591,16.0,0.0,16.0
4,abilene,5,1994.0,july,7.0,32.44645,-99.747591,127.0,0.0,127.0


In [None]:
summer_community_df1 = pd.merge(summer_partition_df, us_summer_df, how='left', left_on='city', right_on='foreign_city')
summer_community_df1.dropna(inplace=True)
summer_community_df1.drop(columns=['us_city', 'foreign_city', 'foreign_country','us_lat','us_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'foreign_lat':'first','foreign_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
summer_community_df1 = summer_community_df1.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
summer_community_df1.rename(columns = {'foreign_lat':'latitude', 'foreign_long':'longitude'}, inplace=True)
summer_community_df1.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,a coruna,2,2013.0,august,8.0,-8.395877,43.371209,0.0,6.0,6.0
1,a coruna,2,2014.0,august,8.0,-8.395877,43.371209,0.0,8.0,8.0
2,a coruna,2,2015.0,august,8.0,-8.395877,43.371209,0.0,7.0,7.0
3,a coruna,2,2015.0,june,6.0,-8.395877,43.371209,0.0,2.0,2.0
4,aalborg,6,1993.0,august,8.0,9.921526,57.046263,0.0,160.0,160.0


In [None]:
#listing city-wise details
summer_community_df = pd.concat([summer_community_df, summer_community_df1], axis=0)
summer_community_df = summer_community_df.astype({"year": 'int',
                  "month_num": 'int'})
summer_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,5,1991,june,6,32.44645,-99.747591,99.0,0.0,99.0
1,abilene,5,1992,june,6,32.44645,-99.747591,213.0,0.0,213.0
2,abilene,5,1993,june,6,32.44645,-99.747591,118.0,132.0,250.0
3,abilene,5,1994,august,8,32.44645,-99.747591,16.0,0.0,16.0
4,abilene,5,1994,july,7,32.44645,-99.747591,127.0,0.0,127.0


In [None]:
#summary of communities
summer_summary_df = community_summary(us_summer_df, summer_partition_df)
summer_summary_df = summer_summary_df.sort_values(by = ['city', 'total_pax'], ascending = False)
summer_summary_df.to_csv('intermittent-result/International US/Intl_summer_summary.csv', index=False)
summer_summary_df.head()

Communities: {6: 339086105, 1: 508492432, 2: 513999744, 3: 317086933, 5: 375634199, 7: 471, 9: 1, 10: 5, 0: 12, 8: 72, 4: 2}


Unnamed: 0,community,city,total_pax
1,1,327,508492432
3,3,287,317086933
6,6,263,339086105
2,2,255,513999744
5,5,204,375634199


Filter top communities

In [None]:
#filtering only the top communities
summer_small_comm = summer_summary_df[(summer_summary_df['city'] < 5) & (summer_summary_df['total_pax']<100)].community
print(summer_small_comm)
summer_community_df[(summer_community_df['community'].isin(summer_small_comm) == False)].head() 

8      8
0      0
10    10
4      4
9      9
Name: community, dtype: int64


Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,5,1991,june,6,32.44645,-99.747591,99.0,0.0,99.0
1,abilene,5,1992,june,6,32.44645,-99.747591,213.0,0.0,213.0
2,abilene,5,1993,june,6,32.44645,-99.747591,118.0,132.0,250.0
3,abilene,5,1994,august,8,32.44645,-99.747591,16.0,0.0,16.0
4,abilene,5,1994,july,7,32.44645,-99.747591,127.0,0.0,127.0


In [None]:
summer_community_df.to_csv('intermittent-result/International US/Intl Summer citywise community.csv', index=False)

In [None]:
#filter top communities city details
us_summer_df = us_summer_df[(us_summer_df['us_community'].isin(summer_small_comm) == False) | (us_summer_df['foreign_community'].isin(summer_small_comm) == False)]
us_summer_df.head()

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,aguadilla,port of spain,trinidad and tobago,1990,august,8,18.449655,-67.118476,-61.518017,10.657268,0,467,467,6,6,6-6
1,albany,montreal,canada,1990,august,8,42.651167,-73.754968,-73.610364,45.497216,45,0,45,1,2,1-2
2,albany,toronto,canada,1990,august,8,42.651167,-73.754968,-79.383935,43.653482,91,0,91,1,2,1-2
3,anchorage,amsterdam,netherlands,1990,august,8,61.216313,-149.894852,4.893604,52.37276,15672,0,15672,3,3,3-3
4,anchorage,brussels,belgium,1990,august,8,61.216313,-149.894852,4.359779,50.843735,4120,0,4120,3,1,3-1


In [None]:
us_summer_df.to_csv('intermittent-result/International US/International summer community.csv', index=False)

##SPRING

In [None]:
#read data
us_spring_df = pd.read_csv('intermittent-result/International US/us_spring_pax_data.csv')
us_spring_df.head()

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total
0,albany,montreal,canada,1990,april,4,42.651167,-73.754968,-73.610364,45.497216,0,183,183
1,allentown,freeport,the bahamas,1990,april,4,40.602206,-75.471279,-78.695362,26.535681,0,30,30
2,anchorage,amsterdam,netherlands,1990,april,4,61.216313,-149.894852,4.893604,52.37276,10007,0,10007
3,anchorage,brussels,belgium,1990,april,4,61.216313,-149.894852,4.359779,50.843735,3254,0,3254
4,anchorage,copenhagen,denmark,1990,april,4,61.216313,-149.894852,12.570072,55.686724,5739,0,5739


In [None]:
'''
us_spring_df = handle_intersecting_cities(us_spring_df)
graph = nx.from_pandas_edgelist(us_spring_df, source='us_city', 
                                  target='foreign_city',
                                  edge_attr='total',
                                  create_using=nx.MultiGraph())
print("Number of nodes: {}".format(len(graph.nodes)))
print("Number of edges: {}".format(len(graph.edges)))


#computing the best partition
spring_partition = community_louvain.best_partition(graph)
print("Number of nodes: {}".format(len(graph.nodes)))
print("Number of edges: {}".format(len(graph.edges)))
print("Partition length: {}".format(len(spring_partition)))
spring_partition_df = pd.DataFrame.from_dict(spring_partition, orient='index')
spring_partition_df.reset_index(inplace=True)
spring_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print(spring_partition_df['community'].unique())



partition_dict = json.dumps(spring_partition)
f = open("intermittent-result/International US/Intl_spring_partition_dict.json", "w")
f.write(partition_dict)
f.close()
'''

'\nus_spring_df = handle_intersecting_cities(us_spring_df)\ngraph = nx.from_pandas_edgelist(us_spring_df, source=\'us_city\', \n                                  target=\'foreign_city\',\n                                  edge_attr=\'total\',\n                                  create_using=nx.MultiGraph())\nprint("Number of nodes: {}".format(len(graph.nodes)))\nprint("Number of edges: {}".format(len(graph.edges)))\n\n\n#computing the best partition\nspring_partition = community_louvain.best_partition(graph)\nprint("Number of nodes: {}".format(len(graph.nodes)))\nprint("Number of edges: {}".format(len(graph.edges)))\nprint("Partition length: {}".format(len(spring_partition)))\nspring_partition_df = pd.DataFrame.from_dict(spring_partition, orient=\'index\')\nspring_partition_df.reset_index(inplace=True)\nspring_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)\nprint(spring_partition_df[\'community\'].unique())\n\n\n\npartition_dict = json.dumps(spring_partition)\

In [None]:
#create graph, community and map community details
us_spring_df = handle_intersecting_cities(us_spring_df)
pax_spring_graph, spring_partition = create_graph_and_community(us_spring_df, 'us_city','foreign_city','total','spring')
us_spring_df = map_community_number(us_spring_df, spring_partition)
us_spring_df.head()

intersecting cities: ['manchester', 'athens', 'alexandria', 'bristol', 'san jose', 'melbourne', 'victoria', 'naples', 'panama city', 'birmingham', 'rome', 'florence', 'rota', 'san antonio', 'koror', 'st. petersburg', 'santa maria', 'rosario', 'trenton']
Number of nodes: 1223
Number of edges: 108772
Partition length: 1223
Unique US cities in the dataset is: 446
Unique Foreign cities in the dataset is: 777


Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,albany,montreal,canada,1990,april,4,42.651167,-73.754968,-73.610364,45.497216,0,183,183,5,1,5-1
1,allentown,freeport,the bahamas,1990,april,4,40.602206,-75.471279,-78.695362,26.535681,0,30,30,1,1,1-1
2,anchorage,amsterdam,netherlands,1990,april,4,61.216313,-149.894852,4.893604,52.37276,10007,0,10007,5,1,5-1
3,anchorage,brussels,belgium,1990,april,4,61.216313,-149.894852,4.359779,50.843735,3254,0,3254,5,2,5-2
4,anchorage,copenhagen,denmark,1990,april,4,61.216313,-149.894852,12.570072,55.686724,5739,0,5739,5,2,5-2


In [None]:
#listing unique communities that are created using Louvain algorithm
spring_partition_df = pd.DataFrame.from_dict(spring_partition, orient='index')
spring_partition_df.reset_index(inplace=True)
spring_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print(spring_partition_df['community'].unique())

[ 5  1  2  3  4  0  9 10  8  6  7]


Modularity scores

In [None]:
#calculate modularity
community_louvain.modularity(spring_partition, pax_spring_graph)

0.40009761832561777

In [None]:
#retrieve city-wise details
spring_community_df = pd.merge(spring_partition_df, us_spring_df, how='left', left_on='city', right_on='us_city')
spring_community_df.dropna(inplace=True)
spring_community_df.drop(columns=['us_city', 'foreign_city', 'foreign_country','foreign_lat','foreign_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'us_lat':'first','us_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
spring_community_df = spring_community_df.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
spring_community_df.rename(columns = {'us_lat':'latitude', 'us_long':'longitude'}, inplace=True)
spring_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,4,1995.0,april,4.0,32.44645,-99.747591,75.0,0.0,75.0
1,abilene,4,1997.0,april,4.0,32.44645,-99.747591,0.0,169.0,169.0
2,abilene,4,1999.0,may,5.0,32.44645,-99.747591,28.0,0.0,28.0
3,abilene,4,2000.0,march,3.0,32.44645,-99.747591,52.0,0.0,52.0
4,abilene,4,2000.0,may,5.0,32.44645,-99.747591,97.0,0.0,97.0


In [None]:
spring_community_df1 = pd.merge(spring_partition_df, us_spring_df, how='left', left_on='city', right_on='foreign_city')
spring_community_df1.dropna(inplace=True)
spring_community_df1.drop(columns=['us_city', 'foreign_city', 'foreign_country','us_lat','us_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'foreign_lat':'first','foreign_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
spring_community_df1 = spring_community_df1.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
spring_community_df1.rename(columns = {'foreign_lat':'latitude', 'foreign_long':'longitude'}, inplace=True)
spring_community_df1.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,a coruna,2,2009.0,march,3.0,-8.395877,43.371209,0.0,12.0,12.0
1,a coruna,2,2011.0,march,3.0,-8.395877,43.371209,0.0,2.0,2.0
2,a coruna,2,2012.0,may,5.0,-8.395877,43.371209,0.0,5.0,5.0
3,a coruna,2,2013.0,april,4.0,-8.395877,43.371209,0.0,3.0,3.0
4,a coruna,2,2014.0,april,4.0,-8.395877,43.371209,0.0,32.0,32.0


In [None]:
spring_community_df = pd.concat([spring_community_df, spring_community_df1], axis=0)
spring_community_df = spring_community_df.astype({"year": 'int',
                  "month_num": 'int'})
spring_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,4,1995,april,4,32.44645,-99.747591,75.0,0.0,75.0
1,abilene,4,1997,april,4,32.44645,-99.747591,0.0,169.0,169.0
2,abilene,4,1999,may,5,32.44645,-99.747591,28.0,0.0,28.0
3,abilene,4,2000,march,3,32.44645,-99.747591,52.0,0.0,52.0
4,abilene,4,2000,may,5,32.44645,-99.747591,97.0,0.0,97.0


In [None]:
#create summary of communities
spring_summary_df = community_summary(us_spring_df, spring_partition_df)
spring_summary_df = spring_summary_df.sort_values(by = ['city', 'total_pax'], ascending = False)
spring_summary_df.to_csv('intermittent-result/International US/Intl_spring_summary.csv', index=False)
spring_summary_df.head()

Communities: {5: 363520018, 1: 448548116, 2: 425365567, 3: 193456075, 4: 225959363, 0: 1546, 9: 65, 10: 380, 8: 1, 6: 6, 7: 2}


Unnamed: 0,community,city,total_pax
5,5,381,363520018
2,2,276,425365567
1,1,244,448548116
3,3,177,193456075
4,4,128,225959363


Filter top communities

In [None]:
#filter top communities
spring_small_comm = spring_summary_df[(spring_summary_df['city'] < 5) & (spring_summary_df['total_pax']<100)].community
print(spring_small_comm)
spring_community_df[(spring_community_df['community'].isin(spring_small_comm) == False)].head()

9    9
6    6
7    7
8    8
Name: community, dtype: int64


Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,4,1995,april,4,32.44645,-99.747591,75.0,0.0,75.0
1,abilene,4,1997,april,4,32.44645,-99.747591,0.0,169.0,169.0
2,abilene,4,1999,may,5,32.44645,-99.747591,28.0,0.0,28.0
3,abilene,4,2000,march,3,32.44645,-99.747591,52.0,0.0,52.0
4,abilene,4,2000,may,5,32.44645,-99.747591,97.0,0.0,97.0


In [None]:
us_spring_df[(us_spring_df['us_community'].isin(spring_small_comm)) | (us_spring_df['foreign_community'].isin(spring_small_comm))]

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
15031,bay st. louis,farafangana,madagascar,1995,may,5,30.313245,-89.334322,47.830655,-22.82158,0,65,65,9,9,9-9
35488,homer,bellavista,peru,2002,april,4,59.644088,-151.540148,-76.315338,-7.684248,1,0,1,8,8,8-8
55601,coatesville,padang sidempuan,indonesia,2007,may,5,39.983162,-75.823836,99.272385,1.381098,0,6,6,6,6,6-6
74189,danbury,lagos de moreno,mexico,2012,march,3,41.39508,-73.475291,-101.819957,21.53303,0,2,2,7,7,7-7


In [None]:
us_spring_df = us_spring_df[(us_spring_df['us_community'].isin(spring_small_comm) == False) | (us_spring_df['foreign_community'].isin(spring_small_comm) == False)]
us_spring_df.head()

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,albany,montreal,canada,1990,april,4,42.651167,-73.754968,-73.610364,45.497216,0,183,183,5,1,5-1
1,allentown,freeport,the bahamas,1990,april,4,40.602206,-75.471279,-78.695362,26.535681,0,30,30,1,1,1-1
2,anchorage,amsterdam,netherlands,1990,april,4,61.216313,-149.894852,4.893604,52.37276,10007,0,10007,5,1,5-1
3,anchorage,brussels,belgium,1990,april,4,61.216313,-149.894852,4.359779,50.843735,3254,0,3254,5,2,5-2
4,anchorage,copenhagen,denmark,1990,april,4,61.216313,-149.894852,12.570072,55.686724,5739,0,5739,5,2,5-2


In [None]:
#save results
us_spring_df.to_csv('intermittent-result/International US/International spring community.csv', index=False)

##AUTUMN

In [None]:
#read data
us_autumn_df = pd.read_csv('intermittent-result/International US/us_autumn_pax_data.csv')
us_autumn_df.head()

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total
0,aguadilla,providenciales,turks and caicos islands,1990,november,11,18.449655,-67.118476,-72.300446,21.802061,0,50,50
1,aguadilla,st. lucia,saint lucia,1990,november,11,18.449655,-67.118476,-60.975036,13.825049,0,123,123
2,albany,cancun,mexico,1990,november,11,42.651167,-73.754968,-86.851047,21.161785,0,330,330
3,allentown,freeport,the bahamas,1990,november,11,40.602206,-75.471279,-78.695362,26.535681,0,138,138
4,anchorage,amsterdam,netherlands,1990,november,11,61.216313,-149.894852,4.893604,52.37276,5265,0,5265


In [None]:
'''
us_autumn_df = handle_intersecting_cities(us_autumn_df)
autumn_pax_graph = nx.from_pandas_edgelist(us_autumn_df, source='us_city', 
                                  target='foreign_city',
                                  edge_attr='total',
                                  create_using=nx.MultiGraph())
print("Number of nodes: {}".format(len(autumn_pax_graph.nodes)))
print("Number of edges: {}".format(len(autumn_pax_graph.edges)))


#computing the best partition
autumn_partition = community_louvain.best_partition(autumn_pax_graph)
autumn_partition_df = pd.DataFrame.from_dict(autumn_partition, orient='index')
autumn_partition_df.reset_index(inplace=True)
autumn_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print("Number of nodes: {}".format(len(autumn_pax_graph.nodes)))
print("Number of edges: {}".format(len(autumn_pax_graph.edges)))
print("Partition length: {}".format(len(autumn_partition)))
print(autumn_partition_df['community'].unique())


autumn_partition_dict = json.dumps(autumn_partition)
f = open("intermittent-result/International US/Intl_autumn_partition_dict.json", "w")
f.write(autumn_partition_dict)
f.close()
'''

'\nus_autumn_df = handle_intersecting_cities(us_autumn_df)\nautumn_pax_graph = nx.from_pandas_edgelist(us_autumn_df, source=\'us_city\', \n                                  target=\'foreign_city\',\n                                  edge_attr=\'total\',\n                                  create_using=nx.MultiGraph())\nprint("Number of nodes: {}".format(len(autumn_pax_graph.nodes)))\nprint("Number of edges: {}".format(len(autumn_pax_graph.edges)))\n\n\n#computing the best partition\nautumn_partition = community_louvain.best_partition(autumn_pax_graph)\nautumn_partition_df = pd.DataFrame.from_dict(autumn_partition, orient=\'index\')\nautumn_partition_df.reset_index(inplace=True)\nautumn_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)\nprint("Number of nodes: {}".format(len(autumn_pax_graph.nodes)))\nprint("Number of edges: {}".format(len(autumn_pax_graph.edges)))\nprint("Partition length: {}".format(len(autumn_partition)))\nprint(autumn_partition_df[\'community\

In [None]:
#create graph, communities and map the community details to the master data
us_autumn_df = handle_intersecting_cities(us_autumn_df)
pax_autumn_graph, autumn_partition = create_graph_and_community(us_autumn_df, 'us_city','foreign_city','total','autumn')
us_autumn_df = map_community_number(us_autumn_df, autumn_partition)
us_autumn_df.head()

intersecting cities: ['manchester', 'sault ste. marie', 'bristol', 'aberdeen', 'san jose', 'melbourne', 'victoria', 'marathon', 'london', 'naples', 'panama city', 'birmingham', 'rome', 'rota', 'san antonio', 'koror', 'st. petersburg', 'rosario', 'trenton']
Number of nodes: 1260
Number of edges: 101084
Partition length: 1260
Unique US cities in the dataset is: 461
Unique Foreign cities in the dataset is: 799


Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,aguadilla,providenciales,turks and caicos islands,1990,november,11,18.449655,-67.118476,-72.300446,21.802061,0,50,50,0,7,0-7
1,aguadilla,st. lucia,saint lucia,1990,november,11,18.449655,-67.118476,-60.975036,13.825049,0,123,123,0,0,0-0
2,albany,cancun,mexico,1990,november,11,42.651167,-73.754968,-86.851047,21.161785,0,330,330,7,7,7-7
3,allentown,freeport,the bahamas,1990,november,11,40.602206,-75.471279,-78.695362,26.535681,0,138,138,0,0,0-0
4,anchorage,amsterdam,netherlands,1990,november,11,61.216313,-149.894852,4.893604,52.37276,5265,0,5265,9,9,9-9


In [None]:
#list unique communities that are created using Louvain algorithm
autumn_partition_df = pd.DataFrame.from_dict(autumn_partition, orient='index')
autumn_partition_df.reset_index(inplace=True)
autumn_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print(autumn_partition_df['community'].unique())

[ 0  7  9  6 10  1  2  3  4  5  8]


Modularity score

In [None]:
#calculate modularity scores
community_louvain.modularity(autumn_partition, pax_autumn_graph)

0.32987613647781843

In [None]:
#retrieve city-wise data
autumn_community_df = pd.merge(autumn_partition_df, us_autumn_df, how='left', left_on='city', right_on='us_city')
autumn_community_df.dropna(inplace=True)
autumn_community_df.drop(columns=['us_city', 'foreign_city', 'foreign_country','foreign_lat','foreign_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'us_lat':'first','us_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
autumn_community_df = autumn_community_df.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
autumn_community_df.rename(columns = {'us_lat':'latitude', 'us_long':'longitude'}, inplace=True)
autumn_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,aberdeen_us,9,1991.0,october,10.0,46.975371,-123.815722,0.0,26.0,26.0
1,abilene,9,1993.0,october,10.0,32.44645,-99.747591,119.0,0.0,119.0
2,abilene,9,1997.0,october,10.0,32.44645,-99.747591,94.0,0.0,94.0
3,abilene,9,1998.0,october,10.0,32.44645,-99.747591,132.0,0.0,132.0
4,abilene,9,2001.0,october,10.0,32.44645,-99.747591,73.0,0.0,73.0


In [None]:
autumn_community_df1 = pd.merge(autumn_partition_df, us_autumn_df, how='left', left_on='city', right_on='foreign_city')
autumn_community_df1.dropna(inplace=True)
autumn_community_df1.drop(columns=['us_city', 'foreign_city', 'foreign_country','us_lat','us_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'foreign_lat':'first','foreign_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
autumn_community_df1 = autumn_community_df1.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
autumn_community_df1.rename(columns = {'foreign_lat':'latitude', 'foreign_long':'longitude'}, inplace=True)
autumn_community_df1.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,a coruna,0,2009.0,october,10.0,-8.395877,43.371209,0.0,4.0,4.0
1,a coruna,0,2010.0,september,9.0,-8.395877,43.371209,0.0,5.0,5.0
2,a coruna,0,2019.0,september,9.0,-8.395877,43.371209,0.0,2.0,2.0
3,abbotsford,9,2002.0,november,11.0,-122.329479,49.052116,108.0,0.0,108.0
4,abbotsford,9,2002.0,october,10.0,-122.329479,49.052116,116.0,0.0,116.0


In [None]:
autumn_community_df = pd.concat([autumn_community_df, autumn_community_df1], axis=0)
autumn_community_df = autumn_community_df.astype({"year": 'int',
                  "month_num": 'int'})
autumn_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,aberdeen_us,9,1991,october,10,46.975371,-123.815722,0.0,26.0,26.0
1,abilene,9,1993,october,10,32.44645,-99.747591,119.0,0.0,119.0
2,abilene,9,1997,october,10,32.44645,-99.747591,94.0,0.0,94.0
3,abilene,9,1998,october,10,32.44645,-99.747591,132.0,0.0,132.0
4,abilene,9,2001,october,10,32.44645,-99.747591,73.0,0.0,73.0


In [None]:
#create summary of communities
autumn_summary_df = community_summary(us_autumn_df, autumn_partition_df)
autumn_summary_df = autumn_summary_df.sort_values(by = ['city', 'total_pax'], ascending = False)
autumn_summary_df.to_csv('intermittent-result/International US/Intl_autumn_summary.csv', index=False)
autumn_summary_df.head()

Communities: {0: 393484520, 7: 525954439, 9: 376755145, 6: 220366418, 10: 4339, 1: 109, 2: 163, 3: 488, 4: 3, 5: 6, 8: 5}


Unnamed: 0,community,city,total_pax
9,9,398,376755145
0,0,380,393484520
7,7,347,525954439
6,6,116,220366418
10,10,5,4339


Filter top communities

In [None]:
#filter top communities
autumn_small_comm = autumn_summary_df[(autumn_summary_df['city'] < 5) & (autumn_summary_df['total_pax']<100)].community
print(autumn_small_comm)
autumn_community_df[(autumn_community_df['community'].isin(autumn_small_comm) == False)] 

us_autumn_df = us_autumn_df[(us_autumn_df['us_community'].isin(autumn_small_comm) == False) | (us_autumn_df['foreign_community'].isin(autumn_small_comm) == False)]
us_autumn_df.head()

5    5
8    8
4    4
Name: community, dtype: int64


Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,aguadilla,providenciales,turks and caicos islands,1990,november,11,18.449655,-67.118476,-72.300446,21.802061,0,50,50,0,7,0-7
1,aguadilla,st. lucia,saint lucia,1990,november,11,18.449655,-67.118476,-60.975036,13.825049,0,123,123,0,0,0-0
2,albany,cancun,mexico,1990,november,11,42.651167,-73.754968,-86.851047,21.161785,0,330,330,7,7,7-7
3,allentown,freeport,the bahamas,1990,november,11,40.602206,-75.471279,-78.695362,26.535681,0,138,138,0,0,0-0
4,anchorage,amsterdam,netherlands,1990,november,11,61.216313,-149.894852,4.893604,52.37276,5265,0,5265,9,9,9-9


In [None]:
#save results
us_autumn_df.to_csv('intermittent-result/International US/International autumn community.csv', index=False)

##WINTER

In [None]:
#read data
us_winter_df = pd.read_csv('intermittent-result/International US/us_winter_pax_data.csv')
us_winter_df.head()

Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total
0,albany,montreal,canada,1990,december,12,42.651167,-73.754968,-73.610364,45.497216,108,0,108
1,anchorage,amsterdam,netherlands,1990,december,12,61.216313,-149.894852,4.893604,52.37276,6028,0,6028
2,anchorage,copenhagen,denmark,1990,december,12,61.216313,-149.894852,12.570072,55.686724,4559,0,4559
3,anchorage,dusseldorf,germany,1990,december,12,61.216313,-149.894852,6.776314,51.225402,1736,0,1736
4,anchorage,london,united kingdom,1990,december,12,61.216313,-149.894852,-0.127647,51.507322,16032,0,16032


In [None]:
'''
us_winter_df = handle_intersecting_cities(us_winter_df)
winter_pax_graph = nx.from_pandas_edgelist(us_winter_df, source='us_city', 
                                  target='foreign_city',
                                  edge_attr='total',
                                  create_using=nx.MultiGraph())
print("Number of nodes: {}".format(len(winter_pax_graph.nodes)))
print("Number of edges: {}".format(len(winter_pax_graph.edges)))


#computing the best partition
winter_partition = community_louvain.best_partition(winter_pax_graph)
winter_partition_df = pd.DataFrame.from_dict(winter_partition, orient='index')
winter_partition_df.reset_index(inplace=True)
winter_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print(winter_partition_df['community'].unique())


winter_partition_dict = json.dumps(winter_partition)
f = open("intermittent-result/International US/Intl_winter_partition_dict.json", "w")
f.write(winter_partition_dict)
f.close()
'''

'\nus_winter_df = handle_intersecting_cities(us_winter_df)\nwinter_pax_graph = nx.from_pandas_edgelist(us_winter_df, source=\'us_city\', \n                                  target=\'foreign_city\',\n                                  edge_attr=\'total\',\n                                  create_using=nx.MultiGraph())\nprint("Number of nodes: {}".format(len(winter_pax_graph.nodes)))\nprint("Number of edges: {}".format(len(winter_pax_graph.edges)))\n\n\n#computing the best partition\nwinter_partition = community_louvain.best_partition(winter_pax_graph)\nwinter_partition_df = pd.DataFrame.from_dict(winter_partition, orient=\'index\')\nwinter_partition_df.reset_index(inplace=True)\nwinter_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)\nprint(winter_partition_df[\'community\'].unique())\n\n\nwinter_partition_dict = json.dumps(winter_partition)\nf = open("intermittent-result/International US/Intl_winter_partition_dict.json", "w")\nf.write(winter_partition_dict)\nf.

In [None]:
#create graph, communities and map community details to the data
us_winter_df = handle_intersecting_cities(us_winter_df)
pax_winter_graph, winter_partition = create_graph_and_community(us_winter_df, 'us_city','foreign_city','total','winter')
us_winter_df = map_community_number(us_winter_df, winter_partition)
us_winter_df.head()

intersecting cities: ['manchester', 'sault ste. marie', 'bristol', 'san jose', 'melbourne', 'victoria', 'dublin', 'naples', 'panama city', 'birmingham', 'rome', 'rota', 'san antonio', 'koror', 'st. petersburg', 'santa maria', 'rosario', 'trenton']
Number of nodes: 1173
Number of edges: 108989
Partition length: 1173
Unique US cities in the dataset is: 431
Unique Foreign cities in the dataset is: 742


Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,albany,montreal,canada,1990,december,12,42.651167,-73.754968,-73.610364,45.497216,108,0,108,0,0,0-0
1,anchorage,amsterdam,netherlands,1990,december,12,61.216313,-149.894852,4.893604,52.37276,6028,0,6028,4,0,4-0
2,anchorage,copenhagen,denmark,1990,december,12,61.216313,-149.894852,12.570072,55.686724,4559,0,4559,4,3,4-3
3,anchorage,dusseldorf,germany,1990,december,12,61.216313,-149.894852,6.776314,51.225402,1736,0,1736,4,5,4-5
4,anchorage,london,united kingdom,1990,december,12,61.216313,-149.894852,-0.127647,51.507322,16032,0,16032,4,0,4-0


In [None]:
#list unique communities that are created using Louvain algorithm
winter_partition_df = pd.DataFrame.from_dict(winter_partition, orient='index')
winter_partition_df.reset_index(inplace=True)
winter_partition_df.rename(columns={"index":"city",0:"community"}, inplace=True)
print(winter_partition_df['community'].unique())

[ 0  4  3  5  7  8  9 10  1  2  6]


Modularity score

In [None]:
#modularity scores
community_louvain.modularity(winter_partition, pax_winter_graph)

0.41854808638724338

In [None]:
#retrieve city-wise records
winter_community_df = pd.merge(winter_partition_df, us_winter_df, how='left', left_on='city', right_on='us_city')
winter_community_df.dropna(inplace=True)
winter_community_df.drop(columns=['us_city', 'foreign_city', 'foreign_country','foreign_lat','foreign_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'us_lat':'first','us_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
winter_community_df = winter_community_df.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
winter_community_df.rename(columns = {'us_lat':'latitude', 'us_long':'longitude'}, inplace=True)
winter_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,5,1996.0,december,12.0,32.44645,-99.747591,74.0,0.0,74.0
1,abilene,5,1998.0,january,1.0,32.44645,-99.747591,128.0,0.0,128.0
2,abilene,5,2000.0,february,2.0,32.44645,-99.747591,154.0,0.0,154.0
3,abilene,5,2004.0,february,2.0,32.44645,-99.747591,163.0,0.0,163.0
4,abilene,5,2005.0,december,12.0,32.44645,-99.747591,111.0,0.0,111.0


In [None]:
winter_community_df1 = pd.merge(winter_partition_df, us_winter_df, how='left', left_on='city', right_on='foreign_city')
winter_community_df1.dropna(inplace=True)
winter_community_df1.drop(columns=['us_city', 'foreign_city', 'foreign_country','us_lat','us_long','us_community','foreign_community','us_foreign_community'], inplace=True)
agg_dict = {'foreign_lat':'first','foreign_long':'first', 'scheduled':'sum', 'charter':'sum', 'total':'sum'}
winter_community_df1 = winter_community_df1.groupby(['city','community','year','month','month_num'], as_index=False).agg(agg_dict)
winter_community_df1.rename(columns = {'foreign_lat':'latitude', 'foreign_long':'longitude'}, inplace=True)
winter_community_df1.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,a coruna,3,2014.0,january,1.0,-8.395877,43.371209,0.0,2.0,2.0
1,abbotsford,4,2002.0,december,12.0,-122.329479,49.052116,80.0,0.0,80.0
2,abbotsford,4,2005.0,december,12.0,-122.329479,49.052116,593.0,0.0,593.0
3,abbotsford,4,2005.0,february,2.0,-122.329479,49.052116,0.0,1696.0,1696.0
4,abbotsford,4,2005.0,january,1.0,-122.329479,49.052116,0.0,348.0,348.0


In [None]:
winter_community_df = pd.concat([winter_community_df, winter_community_df1], axis=0)
winter_community_df = winter_community_df.astype({"year": 'int',
                  "month_num": 'int'})
winter_community_df.head()

Unnamed: 0,city,community,year,month,month_num,latitude,longitude,scheduled,charter,total
0,abilene,5,1996,december,12,32.44645,-99.747591,74.0,0.0,74.0
1,abilene,5,1998,january,1,32.44645,-99.747591,128.0,0.0,128.0
2,abilene,5,2000,february,2,32.44645,-99.747591,154.0,0.0,154.0
3,abilene,5,2004,february,2,32.44645,-99.747591,163.0,0.0,163.0
4,abilene,5,2005,december,12,32.44645,-99.747591,111.0,0.0,111.0


In [None]:
#create summary of communities
winter_summary_df = community_summary(us_winter_df, winter_partition_df)
winter_summary_df = winter_summary_df.sort_values(by = ['city', 'total_pax'], ascending = False)
winter_summary_df.to_csv('intermittent-result/International US/Intl_winter_summary.csv', index=False)
winter_summary_df.head()

Communities: {0: 391399462, 4: 358820907, 3: 374280973, 5: 341439745, 7: 39, 8: 87, 9: 921, 10: 749, 1: 18, 2: 62, 6: 2}


Unnamed: 0,community,city,total_pax
4,4,359,358820907
0,0,299,391399462
3,3,279,374280973
5,5,216,341439745
2,2,6,62


Filter top communities

In [None]:
winter_small_comm = winter_summary_df[(winter_summary_df['city'] < 5) & (winter_summary_df['total_pax']<100)].community
print(winter_small_comm)
winter_community_df[(winter_community_df['community'].isin(winter_small_comm) == False)] 

us_winter_df = us_winter_df[(us_winter_df['us_community'].isin(winter_small_comm) == False) | (us_winter_df['foreign_community'].isin(winter_small_comm) == False)]
us_winter_df.head()

8    8
7    7
1    1
6    6
Name: community, dtype: int64


Unnamed: 0,us_city,foreign_city,foreign_country,year,month,month_num,us_lat,us_long,foreign_lat,foreign_long,scheduled,charter,total,us_community,foreign_community,us_foreign_community
0,albany,montreal,canada,1990,december,12,42.651167,-73.754968,-73.610364,45.497216,108,0,108,0,0,0-0
1,anchorage,amsterdam,netherlands,1990,december,12,61.216313,-149.894852,4.893604,52.37276,6028,0,6028,4,0,4-0
2,anchorage,copenhagen,denmark,1990,december,12,61.216313,-149.894852,12.570072,55.686724,4559,0,4559,4,3,4-3
3,anchorage,dusseldorf,germany,1990,december,12,61.216313,-149.894852,6.776314,51.225402,1736,0,1736,4,5,4-5
4,anchorage,london,united kingdom,1990,december,12,61.216313,-149.894852,-0.127647,51.507322,16032,0,16032,4,0,4-0


In [None]:
#save results
us_winter_df.to_csv("intermittent-result/International US/International winter community.csv", index=False)