# Delhi-Metro Network Analysis

### 1. Libraries 
- pandas-data manipulation and analysis 
- folium-creating interactive maps
- plotly.express-interactive visualizations
- plotly.graph_objects- lower-level API for creating more customized and complex plots
- make_subplots -function to create subplots (multiple plots in a single figure) using Plotly
- plotly.io- module provides functions for reading and writing Plotly figures
- networkx


In [180]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default="plotly_white"
import networkx as nx
import matplotlib.pyplot as plt

In [181]:
metro_data=pd.read_csv("Delhi-Metro-Network.csv")
print(metro_data.head())

   Station ID         Station Name  Distance from Start (km)          Line  \
0           1             Jhil Mil                      10.3      Red line   
1           2  Welcome [Conn: Red]                      46.8     Pink line   
2           3          DLF Phase 3                      10.0   Rapid Metro   
3           4           Okhla NSIC                      23.8  Magenta line   
4           5           Dwarka Mor                      10.2     Blue line   

  Opening Date Station Layout   Latitude  Longitude  
0   2008-04-06       Elevated  28.675790  77.312390  
1   2018-10-31       Elevated  28.671800  77.277560  
2   2013-11-14       Elevated  28.493600  77.093500  
3   2017-12-25       Elevated  28.554483  77.264849  
4   2005-12-30       Elevated  28.619320  77.033260  


In [182]:
#check for missing values
missing_values=metro_data.isnull().sum()
missing_values

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [183]:
#checking data types
data_types=metro_data.dtypes
data_types

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

In [184]:
#convert 'Opening Date' column to date-time format
metro_data['Opening Date']=pd.to_datetime(metro_data['Opening Date'])

### Geospatial Analysis

In [185]:
#define color scheme for metro lines
line_colors={
    'Red line':'red',
    'Blue line':'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}
delhi_map_tooltip=folium.Map(location=[28.7041,77.1025],zoom_start=11)

# Add markers for each metro station with tooltip and icon
for index, row in metro_data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')

    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Station Name']} - {line} Line",
        tooltip=f"Station: {row['Station Name']}<br>Line: {line}",
        icon=folium.Icon(color=color, icon='train', prefix='fa')  
    ).add_to(delhi_map_tooltip)

# Display the map 
delhi_map_tooltip


An interactive map is shown using Folium, displaying Delhi metro stations with colored markers and additional information in popups and tooltips. When you hover over a marker, a tooltip appears with detailed information, including the station name and metro line

### Temporal Analysis

#### Yearly Growth in the Number of Stations

In [186]:
metro_data['Opening Year']=metro_data['Opening Date'].dt.year

#count number of stations opened each year
station_per_year=metro_data['Opening Year'].value_counts().sort_index()

station_per_year_df=station_per_year.reset_index()
station_per_year_df.columns=['Year','Number of Stations']

fig=px.bar(station_per_year_df,x='Year',y='Number of Stations',title="Number of Metro Stations Opened Each Year",
    labels={'Year': 'Year','Number of Stations':'Number of Stations Opened'},color='Year',color_continuous_scale='emrld')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [187]:
color_continuous_scale='Warm'

#### Monthly Growth in the Number of Stations

In [188]:
metro_data['Opening Month']=metro_data['Opening Date'].dt.month

station_per_month=metro_data['Opening Month'].value_counts().sort_index()
station_per_month_df=station_per_month.reset_index()
station_per_month_df.columns=['Month','Number of Stations']

fig_monthly=px.bar(station_per_month_df,x='Month',y='Number of Stations',title="Number of Stations Opened Each Month",
                   labels={'Month':'Month','Number of Stations':'Number of Stations Opened'},color='Month',color_continuous_scale='redor')
fig_monthly.update_layout(xaxis_tickvals=list(range(1, 13)))
fig_monthly.show()

#### Line-Wise Growth in the Number of Stations

In [189]:
line_per_year = metro_data.groupby(['Line', 'Opening Year']).size().unstack(fill_value=0).stack().reset_index(name='Number of Stations')
fig_line_temporal = px.line(line_per_year, x='Opening Year', y='Number of Stations', color='Line',
                            title="Number of Metro Stations Opened Over Time by Line",
                            labels={'Opening Year': 'Year', 'Number of Stations': 'Number of Stations Opened'},color_discrete_map=line_colors)
fig_line_temporal.show()



### Line Analysis

In [190]:
# Count the number of stations per metro line
stations_per_line = metro_data['Line'].value_counts()

# calculate the total distance of each metro line (max distance from start)
total_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()

# Calculate the average distance between stations for each metro line
avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})

# sorting the DataFrame by the number of stations in descending order
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)

line_analysis.reset_index(drop=True, inplace=True)
print(line_analysis)

                 Line  Number of Stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance Between Stations (km)  
0                                 1.355000  
1                                 1.097917  
2                                 1.157143  
3                                 1.950000  
4                                 1.240000  
5                                 1.050000  
6                                 1.379167  
7        

#### Plotting Number of Stations and Average Distance of Each Line

In [191]:
# creating subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations Per Metro Line',
                                                    'Average Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.2)

# plot for Number of Stations per Line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of Stations'],
           orientation='h', name='Number of Stations', marker_color='purple'),
    row=1, col=1
)

# plot for Average Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
           orientation='h', name='Average Distance (km)', marker_color='pink'),
    row=1, col=2
)

# update xaxis properties
fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

# update yaxis properties
fig.update_yaxes(title_text="Metro Line", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

# Add hover information for Number of Stations plot
fig.data[0].hovertemplate = 'Line: %{y} <br>Number of Stations: %{x}'

# Add hover information for Average Distance plot
fig.data[1].hovertemplate = 'Line: %{y} <br>Average Distance: %{x} km'


fig.show()

#### Plotting Maximum and Total Distance between Stations for Each Line

In [193]:
# Calculate metrics
total_length_per_line = metro_data.groupby('Line')['Distance from Start (km)'].sum()
max_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()

# Add new columns to the line analysis DataFrame
line_analysis['Total Length (km)'] = total_length_per_line.values
line_analysis['Maximum Distance Between Stations (km)'] = max_distance_per_line.values

print(line_analysis)

                 Line  Number of Stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance Between Stations (km)  Total Length (km)  \
0                                 1.355000              280.4   
1                                 1.097917             1281.1   
2                                 1.157143               32.0   
3                                 1.950000                5.4   
4                                 1.240000  

In [194]:
# creating subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Maximum Distance Between Stations Per Metro Line',
                                                    'Total Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.4)

# Plot for Maximum Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Maximum Distance Between Stations (km)'],
           orientation='h', name='Maximum Distance (km)', marker_color='lightblue'),
    row=1, col=1
)

# Plot for Total Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Total Length (km)'],
           orientation='h', name='Total Length (km)', marker_color='navy'),
    row=1, col=2
)

# Update x-axis properties
fig.update_xaxes(title_text="Maximum Distance Between Stations (km)", row=1, col=1)
fig.update_xaxes(title_text="Total Distance Between Stations (km)", row=1, col=2)

# Update y-axis properties
fig.update_yaxes(title_text="Metro Line", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

# Add hover information for Maximum Distance plot
fig.data[0].hovertemplate = 'Line: %{y} <br>Maximum Distance: %{x}'

# Add hover information for Total Distance plot
fig.data[1].hovertemplate = 'Line: %{y} <br>Total Distance: %{x} km'

# Show the figure
fig.show()



### Station Layout Analysis

In [195]:
layout_counts=metro_data['Station Layout'].value_counts()

custom_colors = ['green', 'blue', 'orange', 'red', 'purple']

#create bar plot
fig=px.bar(x=layout_counts.index,y=layout_counts.values,labels={'x':'Station Layout','y':'Number of Stations'},title='Distribution of Delhi Metro Station Layouts', color=layout_counts.values,color_continuous_scale='ice')

fig.update_layout(xaxis_title="Station Layout",yaxis_title="Number of Stations",coloraxis_showscale=False,template="plotly_white")
hover_template = 'Station Layout: %{x}<br>Number of Stations: %{y}'
fig.update_traces(hovertemplate=hover_template)

### Network Analysis

In [196]:
import networkx as nx

# Create a graph from the DataFrame
G = nx.from_pandas_edgelist(metro_data, 'Station Name', 'Line')

# Calculate degree centrality for each station
degree_centrality = nx.degree_centrality(G)

# Identify key stations with high degree centrality
key_stations = [station for station, centrality in degree_centrality.items() if centrality > 0.1]
print("Key Stations:", key_stations)

Key Stations: ['Pink line', 'Blue line', 'Voilet line', 'Yellow line']


In [197]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from math import radians, sin, cos, sqrt, atan2

# Function to calculate haversine distance
def haversine_distance(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers

    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    # Calculate the distance
    distance = R * c
    return distance

# Create a network graph
G = nx.Graph()

# Add nodes (stations) with attributes (latitude, longitude, station name)
for _, station in metro_data.iterrows():
    G.add_node(station['Station Name'], latitude=station['Latitude'], longitude=station['Longitude'])

distance_threshold_km = 5.0

for i, station1 in metro_data.iterrows():
    for j, station2 in metro_data.iterrows():
        if i != j:
            distance_km = haversine_distance(station1['Latitude'], station1['Longitude'],
                                             station2['Latitude'], station2['Longitude'])
            if distance_km <= distance_threshold_km:
                G.add_edge(station1['Station Name'], station2['Station Name'], weight=distance_km)

# Calculate degree centrality for each station
degree_centrality = nx.degree_centrality(G)

# Find stations with the highest degree centrality
highest_connectivity_stations = sorted(degree_centrality, key=degree_centrality.get, reverse=True)[:5]

print("Stations with Highest Connectivity:")
for station in highest_connectivity_stations:
    print(station)



Stations with Highest Connectivity:
Jhandewalan
Shastri Nagar
Jama Masjid
Pratap Nagar
Mandi House [Conn: Violet]
