<a href="https://colab.research.google.com/github/2022504813pratham/Pratham_Mittal/blob/main/DELHI_METRO_NETWORK_ANALYSIS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importing Necessary Python Libaries
import pandas as pd
import folium
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"

In [2]:
# Loading my dataset
metro_data = pd.read_excel('metro_data.xlsx')
metro_data.sample(5)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
61,62,Nangloi Railway Station,11.2,Green line,2010-02-04,Elevated,28.68208,77.05596
88,89,Noida Sector 61,50.5,Blue line,2019-09-03,Elevated,28.480863,77.084888
258,259,Dilli Haat INA [Conn: Pink],23.8,Yellow line,2010-03-09,Underground,28.57526,77.20935
109,110,Nehru Place,17.4,Voilet line,2010-03-10,Elevated,28.55148,77.25154
95,96,Sector 55-66,0.0,Rapid Metro,2017-03-31,Elevated,28.4936,77.0935


In [3]:
# Converting Data type of my 'Opening Date' col in to Datetime Format
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'])

In [4]:
# Overall view of the data (Cols and their datatype)
metro_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Station ID                285 non-null    int64         
 1   Station Name              285 non-null    object        
 2   Distance from Start (km)  285 non-null    float64       
 3   Line                      285 non-null    object        
 4   Opening Date              285 non-null    datetime64[ns]
 5   Station Layout            285 non-null    object        
 6   Latitude                  285 non-null    float64       
 7   Longitude                 285 non-null    float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(3)
memory usage: 17.9+ KB


In [5]:
# To check if their is any Missing Values
metro_data.isnull().sum()

Unnamed: 0,0
Station ID,0
Station Name,0
Distance from Start (km),0
Line,0
Opening Date,0
Station Layout,0
Latitude,0
Longitude,0


### Removing Duplicates

In [6]:
duplicate_data = metro_data[metro_data.duplicated(subset=['Station Name'], keep=False)]
duplicate_data

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
115,116,Yamuna Bank,0.0,Blue line branch,2009-10-05,At-Grade,28.62331,77.26792
120,121,Ashok Park Main,1.4,Green line,2010-02-04,Elevated,28.67153,77.15527
196,197,Ashok Park Main,0.0,Green line branch,2010-02-04,Elevated,28.67292,77.1442
274,275,Yamuna Bank,35.2,Blue line,2009-10-05,At-Grade,28.62331,77.26792


In [25]:
Station_Id = [116,197]
metro_data = metro_data[~metro_data['Station ID'].isin(Station_Id)]

In [9]:
# defining a color scheme for the metro lines
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

delhi_map_with_line_tooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

# adding colored markers for each metro station with line name in tooltip
for index, row in metro_data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')  # Default color is black if line not found in the dictionary
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Station Name']}",
        tooltip=f"{row['Station Name']}, {line}",
        icon=folium.Icon(color=color)
    ).add_to(delhi_map_with_line_tooltip)

# Displaying the updated map
delhi_map_with_line_tooltip

# We have to update metro station Coordinates because of their wrong positions in map can be seen above 🗺📌

In [10]:
file_path = "metro_data.xlsx"
metro_data.to_excel(file_path, index = False)

In [11]:
from IPython.display import FileLink
FileLink(file_path)

In [12]:
metro_data.loc[metro_data['Station Name'] == 'Sarai', ['Latitude', 'Longitude']] = [28.4776218,77.3042433]

In [13]:
metro_data[metro_data['Station Name'] =='Kashmere Gate [Conn: Violet,Yellow]']

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
66,67,"Kashmere Gate [Conn: Violet,Yellow]",18.5,Red line,2002-12-24,Elevated,28.6675,77.22817


In [14]:
metro_data[metro_data['Line']=='Rapid Metro']

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
18,19,Belvedere Towers,8.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
84,85,Sikandarpur [Conn: Yellow],6.6,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
95,96,Sector 55-66,0.0,Rapid Metro,2017-03-31,Elevated,28.4936,77.0935
117,118,Cyber City,8.6,Rapid Metro,2014-07-05,Elevated,28.4936,77.0935
146,147,Sector 54 Chowk,1.1,Rapid Metro,2017-03-31,Elevated,28.4936,77.0935
167,168,Sector 42-43,3.9,Rapid Metro,2017-04-25,Elevated,28.4936,77.0935
194,195,Moulsari Avenue,9.2,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
198,199,DLF Phase 2,7.3,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
271,272,Sector 53-54,2.6,Rapid Metro,2017-04-25,Elevated,28.4936,77.0935


In [15]:
metro_data[metro_data['Station Name'] == 'NSEZ Noida']

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
210,211,NSEZ Noida,6.3,Aqua line,2019-01-25,Elevated,28.535517,77.391029


# New Updated DataFrame 👇👇

In [16]:
metro_data = pd.read_excel('metro_data12.xlsx')
metro_data

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.675790,77.312390
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.671800,77.277560
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.493670,77.088540
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.619320,77.033260
...,...,...,...,...,...,...,...,...
282,283,Noida Sector 18,43.6,Blue line,2009-12-11,Elevated,28.570810,77.326120
283,284,Knowledge Park II,21.4,Aqua line,2019-01-25,Elevated,28.456867,77.500054
284,285,Mayur Vihar Extention,39.5,Blue line,2009-12-11,Elevated,28.594158,77.294589
285,286,Noida Electronic City,53.5,Blue line,2019-09-03,Elevated,28.627385,77.372518


In [17]:
import folium

# Defining a color scheme for the metro lines
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

# Creating a map centered on Delhi
delhi_map_with_line_tooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

# Adding colored markers for each metro station with line name in tooltip
for index, row in metro_data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')  # Default color is black if line not found in the dictionary
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Station Name']}",
        tooltip=f"{row['Station Name']}, {line}",
        icon=folium.Icon(color=color)
    ).add_to(delhi_map_with_line_tooltip)

# Drawing a single continuous line for Orange line stations in the order of Distance from Start (km)
orange_line_stations = metro_data[metro_data['Line'] == 'Orange line'].sort_values(by='Distance from Start (km)')
coordinates = orange_line_stations[['Latitude', 'Longitude']].values.tolist()

# Adding a PolyLine to the map
folium.PolyLine(locations=coordinates, color='orange', weight=5).add_to(delhi_map_with_line_tooltip)

# Displaying the updated map
delhi_map_with_line_tooltip


In [18]:
# Converting Data type of my 'Opening Date' col in to Datetime Format
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'])

In [19]:
metro_data['Opening Year'] = metro_data['Opening Date'].dt.year

# counting the number of stations opened each year
stations_per_year = metro_data['Opening Year'].value_counts().sort_index()

stations_per_year_df = stations_per_year.reset_index()
stations_per_year_df.columns = ['Year', 'Number of Stations']

In [20]:
fig = px.bar(stations_per_year_df, x='Year', y='Number of Stations',
             title="Number of Metro Stations Opened Each Year in Delhi NCR",
             labels={'Year': 'Year', 'Number of Stations': 'Number of Stations Opened'},
             color='Number of Stations', color_continuous_scale='Solar')

fig.update_layout(xaxis_tickangle=-45, xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")

# Adding annotations for significant years
significant_years = stations_per_year_df[stations_per_year_df['Number of Stations'] > 40]
for i, row in significant_years.iterrows():
    fig.add_annotation(x=row['Year'], y=row['Number of Stations'],
                       text=f"{row['Number of Stations']} stations",
                       showarrow=True, arrowhead=1)

fig.show()


### The bar chart illustrates the number of Delhi Metro stations opened each year.
#### Conversely, there are years with few or no new stations, which could be due to various factors like planning, funding, or construction challenges

## Metro lines in terms of the number of stations they have and the average distance between stations.
#### It will give us insights into the characteristics of each metro line, such as which lines are more extensive or denser.

In [21]:
stations_per_line = metro_data['Line'].value_counts()

# calculating the total distance of each metro line (max distance from start)
total_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()

avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})

# sorting the DataFrame by the number of stations
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)

line_analysis.reset_index(drop=True, inplace=True)
pd.DataFrame(line_analysis)

Unnamed: 0,Line,Number of Stations,Average Distance Between Stations (km)
0,Blue line,50,1.355
1,Pink line,38,1.091837
2,Yellow line,37,1.157143
3,Voilet line,34,1.7
4,Red line,29,1.24
5,Magenta line,25,1.05
6,Aqua line,21,1.379167
7,Green line,21,4.16
8,Rapid Metro,11,1.421622
9,Blue line branch,8,1.0


In [22]:
# creating subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations Per Metro Line',
                                                    'Average Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.2)

# plot for Number of Stations per Line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of Stations'],
           orientation='h', name='Number of Stations', marker_color='crimson'),
    row=1, col=1
)

# plot for Average Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
           orientation='h', name='Average Distance (km)', marker_color='navy'),
    row=1, col=2
)

# update xaxis properties
fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

# update yaxis properties
fig.update_yaxes(title_text="Metro Line", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

# update layout
fig.update_layout(height=600, width=1200, title_text="Metro Line Analysis", template="plotly_white")

fig.show()

## Exploring the station layouts (Elevated, Ground Level, Underground).

####  Analyzing the distribution of these layouts across the network and see if there are any patterns or trends, such as certain lines favouring a particular layout.

In [23]:
layout_counts = metro_data['Station Layout'].value_counts()

# creating the bar plot using Plotly
fig = px.bar(x=layout_counts.index, y=layout_counts.values,
             labels={'x': 'Station Layout', 'y': 'Number of Stations'},
             title='Distribution of Delhi Metro Station Layouts',
             color=layout_counts.index,
             color_continuous_scale='pastel')

# updating layout for better presentation
fig.update_layout(xaxis_title="Station Layout",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")

fig.show()

In [26]:
metro_data[metro_data['Station Layout'] == 'At-Grade']

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude,Opening Year
111,112,Kirti Nagar [Conn: Blue],2.1,Green line branch,2011-08-27,At-Grade,28.65575,77.15057,2011
274,275,Yamuna Bank,35.2,Blue line,2009-10-05,At-Grade,28.62331,77.26792,2009


In [None]:
from google.colab import drive
drive.mount('/content/drive')

#### Metro Network Analysis involves examining the network of metro systems to understand their structure, efficiency, and effectiveness.
#### It typically includes analyzing routes, stations, traffic, connectivity, and other operational aspects.