In [1]:
import pandas as pd
import plotly.graph_objects as go
import networkx as nx

In [2]:
df=pd.read_csv(r"C:\Users\User\Downloads\My_Uber_drive.csv")

In [3]:
df_diff_area = df[df['START*']!=df['STOP*']]

df_diff_area.head()

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*
4,1/6/2016 14:42,1/6/2016 15:49,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit
6,1/6/2016 17:30,1/6/2016 17:35,Business,West Palm Beach,Palm Beach,7.1,Meeting
8,1/10/2016 8:05,1/10/2016 8:25,Business,Cary,Morrisville,8.3,Meeting
9,1/10/2016 12:17,1/10/2016 12:44,Business,Jamaica,New York,16.5,Customer Visit
10,1/10/2016 15:08,1/10/2016 15:51,Business,New York,Queens,10.8,Meeting


# making the network

In [4]:
# making the graph/network
G = nx.from_pandas_edgelist(df_diff_area, source='START*', target='STOP*', 
                            create_using=nx.DiGraph())


In [5]:
# displaying the nodes
G.nodes()


NodeView(('Fort Pierce', 'West Palm Beach', 'Palm Beach', 'Cary', 'Morrisville', 'Jamaica', 'New York', 'Queens', 'Elmhurst', 'Midtown', 'East Harlem', 'NoMad', 'Flatiron District', 'Midtown East', 'Hudson Square', 'Lower Manhattan', "Hell's Kitchen", 'Queens County', 'Downtown', 'Gulfton', 'Eagan Park', 'Jamestown Court', 'Durham', 'Farmington Woods', 'Whitebridge', 'Lake Wellingborough', 'Raleigh', 'Fayetteville Street', 'Umstead', 'Hazelwood', 'Westpark Place', 'Fairmont', 'Meredith Townes', 'Leesville Hollow', 'Apex', 'Chapel Hill', 'Northwoods', 'Williamsburg Manor', 'Macgregor Downs', 'Edgehill Farms', 'Tanglewood', 'Preston', 'Eastgate', 'Walnut Terrace', 'East Elmhurst', 'Jackson Heights', 'Midtown West', 'Long Island City', 'Katunayaka', 'Unknown Location', 'Colombo', 'Nugegoda', 'Islamabad', 'R?walpindi', 'Noorpur Shahan', 'Heritage Pines', 'Waverly Place', 'Wayne Ridge', 'Depot Historic District', 'East Austin', 'West University', 'South Congress', 'Arts District', 'The Drag

In [6]:
# displaying the edges
G.edges()


OutEdgeView([('Fort Pierce', 'West Palm Beach'), ('West Palm Beach', 'Palm Beach'), ('Cary', 'Morrisville'), ('Cary', 'Durham'), ('Cary', 'Raleigh'), ('Cary', 'Apex'), ('Cary', 'Chapel Hill'), ('Cary', 'Latta'), ('Cary', 'Holly Springs'), ('Cary', 'Wake Forest'), ('Cary', 'Winston Salem'), ('Cary', 'Unknown Location'), ('Cary', 'Wake Co.'), ('Cary', 'Fuquay-Varina'), ('Morrisville', 'Cary'), ('Morrisville', 'Raleigh'), ('Morrisville', 'Banner Elk'), ('Jamaica', 'New York'), ('New York', 'Queens'), ('New York', 'Queens County'), ('New York', 'Long Island City'), ('New York', 'Jamaica'), ('Elmhurst', 'New York'), ('Midtown', 'East Harlem'), ('Midtown', 'Midtown East'), ('Midtown', 'Hudson Square'), ('Midtown', 'Midtown West'), ('Midtown', 'Alief'), ('Midtown', 'Sharpstown'), ('Midtown', 'Washington Avenue'), ('Midtown', 'Downtown'), ('Midtown', 'Greater Greenspoint'), ('East Harlem', 'NoMad'), ('Flatiron District', 'Midtown'), ('Midtown East', 'Midtown'), ('Hudson Square', 'Lower Manhatt

In [7]:
print(G.edges()[('Fort Pierce', 'West Palm Beach')])
print(G.edges()[('Cary', 'Apex')])
print(G.edges()[('Whitebridge', 'Hazelwood')])


{}
{}
{}


In [8]:
# making the edge attributes dataframe
df_edge = df_diff_area.groupby(['START*', 'STOP*'], as_index=False)['MILES*'].count()

df_edge.head()


Unnamed: 0,START*,STOP*,MILES*
0,Agnew,Cory,1
1,Agnew,Renaissance,2
2,Almond,Bryson City,1
3,Apex,Cary,13
4,Apex,Eagle Rock,1


In [9]:
# adding the trip_loc column
trip_loc = [tuple([df_edge.loc[i]['START*'], df_edge.loc[i]['STOP*']]) for i in range(len(df_edge))]

df_edge['trip_loc'] = trip_loc


In [10]:
# adding the trip_loc column
trip_loc = list(zip(df_edge['START*'], df_edge['STOP*']))

df_edge['trip_loc'] = trip_loc


In [11]:
df_edge.head()

Unnamed: 0,START*,STOP*,MILES*,trip_loc
0,Agnew,Cory,1,"(Agnew, Cory)"
1,Agnew,Renaissance,2,"(Agnew, Renaissance)"
2,Almond,Bryson City,1,"(Almond, Bryson City)"
3,Apex,Cary,13,"(Apex, Cary)"
4,Apex,Eagle Rock,1,"(Apex, Eagle Rock)"


In [12]:
# loop to iterate over the edges
for i in range(len(df_edge)):
    
    edge = df_edge.iloc[i]['trip_loc']           # extracting the edge
    d = {'Trips':df_edge.iloc[i]['MILES*']}      # dictionary to store the count of trips for that edge
    G.edges()[edge].update(d)                    # adding the attribute


In [13]:
print(G.edges()[('Fort Pierce', 'West Palm Beach')])
print(G.edges()[('Cary', 'Apex')])
print(G.edges()[('Whitebridge', 'Hazelwood')])


{'Trips': 1}
{'Trips': 14}
{'Trips': 4}


In [14]:
 # defining the position of the nodes
pos = nx.spring_layout(G)

# adding the position in the form of a dictionary to the nodes
for node in G.nodes:
    G.nodes[node]['pos'] = list(pos[node])


In [15]:
print(G.nodes['Fort Pierce']['pos'])
print(G.nodes['Cary']['pos'])
print(G.nodes['New York']['pos'])


[-0.4204187200286893, 0.2611544371562085]
[-0.19352754794771015, 0.3624882902586422]
[0.37321303440425035, -0.6097779531128228]


In [16]:
for node, adj in enumerate(G.adjacency()):
    print(node)
    print(adj)


0
('Fort Pierce', {'West Palm Beach': {'Trips': 1}})
1
('West Palm Beach', {'Palm Beach': {'Trips': 1}})
2
('Palm Beach', {})
3
('Cary', {'Morrisville': {'Trips': 67}, 'Durham': {'Trips': 36}, 'Raleigh': {'Trips': 23}, 'Apex': {'Trips': 14}, 'Chapel Hill': {'Trips': 1}, 'Latta': {'Trips': 1}, 'Holly Springs': {'Trips': 1}, 'Wake Forest': {'Trips': 1}, 'Winston Salem': {'Trips': 1}, 'Unknown Location': {'Trips': 1}, 'Wake Co.': {'Trips': 1}, 'Fuquay-Varina': {'Trips': 1}})
4
('Morrisville', {'Cary': {'Trips': 75}, 'Raleigh': {'Trips': 4}, 'Banner Elk': {'Trips': 1}})
5
('Jamaica', {'New York': {'Trips': 2}})
6
('New York', {'Queens': {'Trips': 1}, 'Queens County': {'Trips': 1}, 'Long Island City': {'Trips': 1}, 'Jamaica': {'Trips': 1}})
7
('Queens', {})
8
('Elmhurst', {'New York': {'Trips': 1}})
9
('Midtown', {'East Harlem': {'Trips': 1}, 'Midtown East': {'Trips': 1}, 'Hudson Square': {'Trips': 1}, 'Midtown West': {'Trips': 1}, 'Alief': {'Trips': 1}, 'Sharpstown': {'Trips': 4}, 'Washing

In [17]:
# Make a node trace

traceRecode = []                    # list to store all the traces

# initialise a node trace
node_trace = go.Scatter(x=[], y=[], hovertext=[], mode='markers', hoverinfo="text", 
                        marker=dict(showscale=True, reversescale=True,
                                    color=[], size=5, colorbar=dict(thickness=10, title='No. of unique locations travelled to or from',
                                                                    xanchor='left', titleside='right'),
                                    colorscale="rdylbu"))

# adding the coordinate position of the nodes
for node in G.nodes():
    x, y = G.nodes()[node]['pos']
    hovertext = node
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    node_trace['hovertext'] += tuple([hovertext])          # add the hovertext (name of the location)

# specify the color of the node
for node, adjacencies in enumerate(G.adjacency()):
    try:
        node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    except:
        pass
    
traceRecode.append(node_trace)                     # adding the node trace


In [18]:
for node, adj in enumerate(G.adjacency()):
    print(node)
    print(adj)


0
('Fort Pierce', {'West Palm Beach': {'Trips': 1}})
1
('West Palm Beach', {'Palm Beach': {'Trips': 1}})
2
('Palm Beach', {})
3
('Cary', {'Morrisville': {'Trips': 67}, 'Durham': {'Trips': 36}, 'Raleigh': {'Trips': 23}, 'Apex': {'Trips': 14}, 'Chapel Hill': {'Trips': 1}, 'Latta': {'Trips': 1}, 'Holly Springs': {'Trips': 1}, 'Wake Forest': {'Trips': 1}, 'Winston Salem': {'Trips': 1}, 'Unknown Location': {'Trips': 1}, 'Wake Co.': {'Trips': 1}, 'Fuquay-Varina': {'Trips': 1}})
4
('Morrisville', {'Cary': {'Trips': 75}, 'Raleigh': {'Trips': 4}, 'Banner Elk': {'Trips': 1}})
5
('Jamaica', {'New York': {'Trips': 2}})
6
('New York', {'Queens': {'Trips': 1}, 'Queens County': {'Trips': 1}, 'Long Island City': {'Trips': 1}, 'Jamaica': {'Trips': 1}})
7
('Queens', {})
8
('Elmhurst', {'New York': {'Trips': 1}})
9
('Midtown', {'East Harlem': {'Trips': 1}, 'Midtown East': {'Trips': 1}, 'Hudson Square': {'Trips': 1}, 'Midtown West': {'Trips': 1}, 'Alief': {'Trips': 1}, 'Sharpstown': {'Trips': 4}, 'Washing

In [19]:
figure = {
    "data": traceRecode,
    "layout": go.Layout(title='Network of Trips', showlegend=False, hovermode='closest')}


In [26]:
go.FigureWidget(figure)


FigureWidget({
    'data': [{'hoverinfo': 'text',
              'hovertext': [Fort Pierce, West Palm Beach, Pa…

In [24]:
# Make an edge trace

for edge in G.edges:
    x0, y0 = G.nodes()[edge[0]]['pos']
    x1, y1 = G.nodes()[edge[1]]['pos']
    weight = G.edges()[edge]['Trips']                 # specifying the parameter for the width of the edge
    
    trace = go.Scatter(x=tuple([x0, x1, None]), y=tuple([y0, y1, None]),    # defining the edge trace
                       mode='lines',
                       line=dict(width=weight,color='Blue'))
    traceRecode.append(trace)                         # adding the edge trace


KeyError: 'Trips'

In [27]:
# filtered dataframe
most_frequent_starts = df['START*'].value_counts().nlargest(10).index

df_filtered = df[df['START*'].isin(most_frequent_starts)]


In [28]:
# making the graph/network
G = nx.from_pandas_edgelist(df_filtered, source='START*', target='STOP*', 
                            create_using=nx.DiGraph())


In [29]:
# making the edge attributes dataframe
df_edge = df_filtered.groupby(['START*', 'STOP*'], as_index=False)['MILES*'].count()


In [30]:
# making the edge attributes dataframe
df_edge = df_filtered.groupby(['START*', 'STOP*'], as_index=False)['MILES*'].count()


In [31]:
# adding the trip_loc column
trip_loc = [tuple([df_edge.loc[i]['START*'], df_edge.loc[i]['STOP*']]) for i in range(len(df_edge))]

df_edge['trip_loc'] = trip_loc


In [32]:
# loop to iterate over the edges
for i in range(len(df_edge)):
    
    edge = df_edge.iloc[i]['trip_loc']                   # extracting the edge
    d = {'Trips':df_edge.iloc[i]['MILES*']}              # dictionary to store the count of trips for that edge
    G.edges()[edge].update(d) 

In [33]:
# defining the position of the nodes
pos = nx.circular_layout(G)

# adding the position in the form of a dictionary to the nodes
for node in G.nodes:
    G.nodes[node]['pos'] = list(pos[node])


In [34]:
# Make a node trace

traceRecode = []                    # list to store all the traces

# initialise a node trace
node_trace = go.Scatter(x=[], y=[], hovertext=[], mode='markers', hoverinfo="text", 
                        marker=dict(showscale=True, reversescale=True,
                                    color=[], size=5, colorbar=dict(thickness=10, title='No. of unique locations travelled to or from',
                                                                    xanchor='left', titleside='right'),
                                    colorscale="rdylbu"))

# adding the coordinate position of the nodes
for node in G.nodes():
    x, y = G.nodes()[node]['pos']
    hovertext = node
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    node_trace['hovertext'] += tuple([hovertext])


In [35]:
# specify the color of the node
for node, adjacencies in enumerate(G.adjacency()):
    try:
        node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    except:
        pass
    
traceRecode.append(node_trace)  


In [37]:

# Make an edge trace

for edge in G.edges:
    x0, y0 = G.nodes()[edge[0]]['pos']
    x1, y1 = G.nodes()[edge[1]]['pos']
    
    trace = go.Scatter(x=tuple([x0, x1, None]), y=tuple([y0, y1, None]),    # defining the edge trace
                       mode='lines',
                       line=dict(width=0.5,color='Blue'))
    traceRecode.append(trace)                         # adding the edge trace







In [38]:
figure = {
    "data": traceRecode,
    "layout": go.Layout(title='Network of Trips for most frequent start locations', showlegend=False, hovermode='closest')}


In [39]:
go.FigureWidget(figure)


FigureWidget({
    'data': [{'hoverinfo': 'text',
              'hovertext': [Cary, Morrisville, Durham, White…