## Generate Network for each City

In [1]:
import networkx as nx
import pandas as pd
import numpy as np

### Read Data

In [2]:
restaurants_df = pd.read_csv("./../datasets/2017-2018_restaurants.csv")

In [3]:
restaurants_df.head(3)

Unnamed: 0,business_id,city,state,latitude,longitude,is_open,attributes,categories,hours,checkin_count,review_count,raw_stars,stars,weighted_stars,tip_count,first_date,last_date,visit_count,is_open_year_after
0,6iYb2HFDywm3zjuRg0shjw,Boulder,CO,40.017544,-105.283348,1,"{'RestaurantsTableService': 'True', 'WiFi': ""u...","Gastropubs, Food, Beer Gardens, Restaurants, B...","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",79,49,3.714286,3.5,2.827977,4,2017-09-09 04:42:34,2021-01-22 05:20:38,132,True
1,tCbdrRPZA0oiIYSmHG3J0w,Portland,OR,45.588906,-122.593331,1,"{'RestaurantsTakeOut': 'True', 'RestaurantsAtt...","Salad, Soup, Sandwiches, Delis, Restaurants, C...","{'Monday': '5:0-18:0', 'Tuesday': '5:0-17:0', ...",181,24,3.5,3.5,2.734268,4,2010-03-09 16:02:04,2021-01-21 17:55:35,209,True
2,D4JtQNTI4X3KcbzacDJsMw,Vancouver,BC,49.251342,-123.101333,1,"{'GoodForKids': 'True', 'Alcohol': ""u'none'"", ...","Restaurants, Thai","{'Monday': '17:0-21:0', 'Tuesday': '17:0-21:0'...",42,28,3.678571,3.5,3.284146,3,2010-09-26 04:03:35,2021-01-23 01:43:50,73,True


In [4]:
boulder_edges_df = pd.read_csv("./../datasets/area_edges/vancouver_edges_df.csv")

In [5]:
boulder_edges_df.head()

Unnamed: 0,id1,id2,distance
0,D4JtQNTI4X3KcbzacDJsMw,TRYXC-fnNuWe41w-7xVLew,76
1,D4JtQNTI4X3KcbzacDJsMw,0Z3I4k4PmcPnvgtFFGcPMg,310
2,D4JtQNTI4X3KcbzacDJsMw,R7qwtUEJfxhjFbO4SRXgFg,324
3,D4JtQNTI4X3KcbzacDJsMw,sGBxgKltNOTT_fI_vQMsAw,140
4,D4JtQNTI4X3KcbzacDJsMw,VMTKdTICNrTQdzbgzIbH3Q,396


In [6]:
len(boulder_edges_df)

111552

### Generate Graph

In [7]:
graph = nx.Graph()

In [8]:
for i, row in restaurants_df.iterrows():
    if row["state"] == "BC":
        node_id = row["business_id"]
        graph.add_node(node_id, latitude=float(row["latitude"]), longitude=float(row["longitude"]), visit=int(row["visit_count"]))

In [9]:
graph.number_of_nodes()

3984

In [10]:
for index, row in boulder_edges_df.iterrows():
    node_id_1 = row["id1"]
    node_id_2 = row["id2"]

    graph.add_edge(node_id_1, node_id_2)

In [11]:
graph.number_of_edges()

111552

In [12]:
average_coeff = nx.average_clustering(graph)
print(f"The average clustering coefficient of the Austin graph is: {average_coeff}")

The average clustering coefficient of the Austin graph is: 0.774546613151452


In [13]:
nx.info(graph)

'Name: \nType: Graph\nNumber of nodes: 3984\nNumber of edges: 111552\nAverage degree:  56.0000'

In [15]:
nx.write_gexf(graph, "vancouver.gexf")