# Practical 11: Write a program to analyze movement data.

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("/content/drive/MyDrive/dataset/urban_mobility_dataset.csv")

In [None]:
df

Unnamed: 0,timestamp,public_transport_usage,traffic_flow,bike_sharing_usage,pedestrian_count,weather_conditions,day_of_week,holiday,event,temperature,humidity,road_incidents,public_transport_delay,bike_availability,pedestrian_incidents
0,2023-01-01 00:00:00,292,3681,296,1939,Clear,Sunday,0,,24.547380,29,0,5.263106,22,4
1,2023-01-01 01:00:00,340,4743,96,688,Snow,Sunday,0,,31.801722,99,3,0.523627,88,2
2,2023-01-01 02:00:00,372,3491,183,1774,Rain,Sunday,0,,0.052832,34,6,0.408793,93,2
3,2023-01-01 03:00:00,365,4360,214,24,Rain,Sunday,0,,-3.757874,41,4,27.640844,89,3
4,2023-01-01 04:00:00,226,121,247,224,Snow,Sunday,0,,-4.948219,45,3,14.820891,49,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,2137-01-29 11:00:00,452,1117,189,599,Clear,Tuesday,0,,34.738752,90,0,9.414779,3,0
999996,2137-01-29 12:00:00,348,950,261,1344,Snow,Tuesday,0,,10.753334,75,1,26.964666,88,3
999997,2137-01-29 13:00:00,130,1620,87,98,Fog,Tuesday,1,,16.771888,55,8,0.019880,94,0
999998,2137-01-29 14:00:00,177,3217,12,516,Clear,Tuesday,0,,-7.029623,48,9,2.938070,12,4


In [None]:
columns = [
    'timestamp', 'public_transport_usage', 'traffic_flow', 'bike_sharing_usage',
    'pedestrian_count', 'road_incidents', 'public_transport_delay',
    'bike_availability', 'pedestrian_incidents'
]
df = df[columns]
df

Unnamed: 0,timestamp,public_transport_usage,traffic_flow,bike_sharing_usage,pedestrian_count,road_incidents,public_transport_delay,bike_availability,pedestrian_incidents
0,2023-01-01 00:00:00,292,3681,296,1939,0,5.263106,22,4
1,2023-01-01 01:00:00,340,4743,96,688,3,0.523627,88,2
2,2023-01-01 02:00:00,372,3491,183,1774,6,0.408793,93,2
3,2023-01-01 03:00:00,365,4360,214,24,4,27.640844,89,3
4,2023-01-01 04:00:00,226,121,247,224,3,14.820891,49,3
...,...,...,...,...,...,...,...,...,...
999995,2137-01-29 11:00:00,452,1117,189,599,0,9.414779,3,0
999996,2137-01-29 12:00:00,348,950,261,1344,1,26.964666,88,3
999997,2137-01-29 13:00:00,130,1620,87,98,8,0.019880,94,0
999998,2137-01-29 14:00:00,177,3217,12,516,9,2.938070,12,4


In [None]:
G = nx.DiGraph()

In [None]:
transport_modes = ['public_transport_usage', 'bike_sharing_usage', 'pedestrian_count']
for mode in transport_modes:
    G.add_node(mode)

In [None]:
for index, row in df.iterrows():
    for i, mode1 in enumerate(transport_modes):
        for mode2 in transport_modes[i+1:]:
            if row[mode1] > 0 and row[mode2] > 0:
                if G.has_edge(mode1, mode2):
                    G[mode1][mode2]['weight'] += 1
                else:
                    G.add_edge(mode1, mode2, weight=1)

In [None]:
degree_centrality = nx.degree_centrality(G)
print("\nDegree Centrality (most connected modes):")
print(sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True))


Degree Centrality (most connected modes):
[('public_transport_usage', 1.0), ('bike_sharing_usage', 1.0), ('pedestrian_count', 1.0)]


In [None]:
betweenness_centrality = nx.betweenness_centrality(G)
print("\nBetweenness Centrality (key transition modes):")
print(sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True))


Betweenness Centrality (key transition modes):
[('public_transport_usage', 0.0), ('bike_sharing_usage', 0.0), ('pedestrian_count', 0.0)]


In [None]:
closeness_centrality = nx.closeness_centrality(G)
print("\nCloseness Centrality (proximity of modes):")
print(sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True))


Closeness Centrality (proximity of modes):
[('pedestrian_count', 1.0), ('bike_sharing_usage', 0.5), ('public_transport_usage', 0.0)]


In [None]:
pagerank = nx.pagerank(G, weight='weight')
print("\nPageRank (importance of modes/locations):")
print(sorted(pagerank.items(), key=lambda x: x[1], reverse=True))


PageRank (importance of modes/locations):
[('pedestrian_count', 0.5209510481443506), ('bike_sharing_usage', 0.2814461986397785), ('public_transport_usage', 0.19760275321587095)]


In [None]:
location_data = {
    'Public Transport': df['public_transport_usage'].sum(),
    'Bike Sharing': df['bike_sharing_usage'].sum(),
    'Pedestrian': df['pedestrian_count'].sum()
}

In [None]:
location_usage = pd.DataFrame(list(location_data.items()), columns=['Location', 'Usage'])
location_usage_sorted = location_usage.sort_values(by='Usage', ascending=False)

In [None]:
print("Top Locations Based on User Movement:")
print(location_usage_sorted.head())

Top Locations Based on User Movement:
           Location       Usage
2        Pedestrian  1009578256
0  Public Transport   274474218
1      Bike Sharing   149452101


# **Conclusion**

The urban mobility analysis effectively reveals transportation patterns and user preferences across different modes of transport. By analyzing public transport, bike-sharing, and pedestrian movement data through network centrality measures and PageRank, we identified key transition points and popular routes. The usage statistics highlight dominant transportation modes, providing valuable insights for urban planning and optimization of city mobility services.