## Importing Libraries

In [1]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"

## Loading Dataset

In [41]:
df = pd.read_csv('Delhi-Metro-Network.csv')

In [42]:
df.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326


## Data Preprocessing

In [5]:
df.isnull().sum()

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [6]:
df.describe()

Unnamed: 0,Station ID,Distance from Start (km),Latitude,Longitude
count,285.0,285.0,285.0,285.0
mean,143.0,19.218947,28.595428,77.029315
std,82.416625,14.002862,0.091316,2.8754
min,1.0,0.0,27.920862,28.698807
25%,72.0,7.3,28.545828,77.10713
50%,143.0,17.4,28.613453,77.20722
75%,214.0,28.8,28.66636,77.281165
max,285.0,52.7,28.878965,77.554479


In [7]:
df.dtypes

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

### Converting opening date to datetime

In [20]:
df['Opening Date']=pd.to_datetime(df['Opening Date'])

## Geospatial Analysis

In [9]:
# defining a color scheme for the metro lines
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

In [11]:
delhi_map_with_line_tooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

# adding colored markers for each metro station with line name in tooltip
for index, row in df.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')  # Default color is black if line not found in the dictionary
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Station Name']}",
        tooltip=f"{row['Station Name']}, {line}",
        icon=folium.Icon(color=color)
    ).add_to(delhi_map_with_line_tooltip)

# Displaying the updated map
delhi_map_with_line_tooltip

## Temporal Analysis

### Analysing the growth of metro over the years


In [21]:
df['Opening Year']=df['Opening Date'].dt.year

In [28]:
# counting the number of stations opened each year
stations_per_year = df['Opening Year'].value_counts().sort_index()

stations_per_year_df = stations_per_year.reset_index()
stations_per_year_df.columns = ['Year', 'Number of Stations']

fig = px.bar(stations_per_year_df, x='Year', y='Number of Stations',
             title="Number of Metro Stations Opened Each Year in Delhi",
             labels={'Year': 'Year', 'Number of Stations': 'Number of Stations Opened'})

fig.update_layout(xaxis_tickangle=-45, xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")

fig.add_trace(px.scatter(stations_per_year_df, x='Year', y='Number of Stations').data[0])


fig.show()

## Analysing for each line

In [23]:
df.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude,Opening Year
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239,2008
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756,2018
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935,2013
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849,2017
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326,2005


In [26]:
df['Line'].unique()

array(['Red line', 'Pink line', 'Rapid Metro', 'Magenta line',
       'Blue line', 'Aqua line', 'Voilet line', 'Yellow line',
       'Green line', 'Gray line', 'Orange line', 'Green line branch',
       'Blue line branch'], dtype=object)

In [32]:
# Count stations opened each year for each metro line
stations_per_line_year = df.groupby(['Opening Year', 'Line']).size().reset_index(name='Number of Stations')

custom_colors = {
    'Red line': '#FF0000',        # Red
    'Pink line': '#FFC0CB',       # Pink
    'Rapid Metro': '#8B0000',     # Dark Red
    'Magenta line': '#FF00FF',    # Magenta
    'Blue line': '#0000FF',       # Blue
    'Aqua line': '#00FFFF',       # Aqua/Cyan
    'Voilet line': '#EE82EE',     # Violet
    'Yellow line': '#FFFF00',     # Yellow
    'Green line': '#008000',      # Green
    'Gray line': '#808080',       # Gray
    'Orange line': '#FFA500',     # Orange
    'Green line branch': '#2E8B57',  # SeaGreen (Different from Green Line)
    'Blue line branch': '#4682B4'  # SteelBlue (Different from Blue Line)
}

fig = px.bar(stations_per_line_year, 
             x='Opening Year', 
             y='Number of Stations', 
             color='Line',  
             title="Number of Metro Stations Opened Each Year by Line in Delhi",
             labels={'Opening Year': 'Year', 'Number of Stations': 'Number of Stations Opened'},
             barmode='stack',
             color_discrete_map=custom_colors)  


fig.update_layout(xaxis_tickangle=-45, 
                  xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")


fig.show()


In [33]:
# Count stations opened each year for each metro line
stations_per_line_year = df.groupby(['Opening Year', 'Line']).size().reset_index(name='Number of Stations')

custom_colors = {
    'Red line': '#FF0000',        # Red
    'Pink line': '#FFC0CB',       # Pink
    'Rapid Metro': '#8B0000',     # Dark Red
    'Magenta line': '#FF00FF',    # Magenta
    'Blue line': '#0000FF',       # Blue
    'Aqua line': '#00FFFF',       # Aqua/Cyan
    'Voilet line': '#EE82EE',     # Violet
    'Yellow line': '#FFFF00',     # Yellow
    'Green line': '#008000',      # Green
    'Gray line': '#808080',       # Gray
    'Orange line': '#FFA500',     # Orange
    'Green line branch': '#2E8B57',  # SeaGreen (Different from Green Line)
    'Blue line branch': '#4682B4'  # SteelBlue (Different from Blue Line)
}

fig = px.bar(stations_per_line_year, 
             x='Opening Year', 
             y='Number of Stations', 
             color='Line',  
             title="Number of Metro Stations Opened Each Year by Line in Delhi",
             labels={'Opening Year': 'Year', 'Number of Stations': 'Number of Stations Opened'},
             barmode='group',
             color_discrete_map=custom_colors)  


fig.update_layout(xaxis_tickangle=-45, 
                  xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")


fig.show()


## Analysinf for Different Types Of Station

In [34]:
df['Station Layout'].unique()

array(['Elevated', 'Underground', 'At-Grade'], dtype=object)

In [37]:
type_of_station = df.groupby(['Opening Year','Station Layout']).size().reset_index(name='Number of Stations')

fig = px.bar(type_of_station,
             x='Opening Year',
             y='Number of Stations',
             color='Station Layout',
             title='Number of Stations Opened Per Year By Type',
             labels={'Opening Year':'Year','Number of Stations':'Number of Stations Opened'},
             barmode='stack'
             )


fig.update_layout(xaxis_tickangle=-45, 
                  xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")


fig.show()

## Line Analysis

In [49]:
stations_per_line = df['Line'].value_counts()

# calculating the total distance of each metro line (max distance from start)
total_distance_per_line = df.groupby('Line')['Distance from Start (km)'].max()

avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})

# sorting the DataFrame by the number of stations
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)

line_analysis.reset_index(drop=True, inplace=True)
print(line_analysis)

                 Line  Number of Stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance Between Stations (km)  
0                                 1.355000  
1                                 1.097917  
2                                 1.157143  
3                                 1.950000  
4                                 1.240000  
5                                 1.050000  
6                                 1.379167  
7        

### Plotting the same 

In [50]:
# creating subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations Per Metro Line',
                                                    'Average Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.2)

# plot for Number of Stations per Line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of Stations'],
           orientation='h', name='Number of Stations', marker_color='crimson'),
    row=1, col=1
)

# plot for Average Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
           orientation='h', name='Average Distance (km)', marker_color='navy'),
    row=1, col=2
)

# update xaxis properties
fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

# update yaxis properties
fig.update_yaxes(title_text="Metro Line", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

# update layout
fig.update_layout(height=600, width=1200, title_text="Metro Line Analysis", template="plotly_white")

fig.show()

## Station Layout Analysis

In [51]:
station_layout = df['Station Layout'].value_counts()

fig = px.bar(x=station_layout.index,
             y=station_layout.values,
             labels = {'x':'Type of Station','y':'Number of Stations'},
             color=station_layout.index,
             color_continuous_scale='pastel')

fig.update_layout(xaxis_title="Station Layout",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")

fig.show()

Indicates how the urban structure of the city has accomodated more elevated station type while at grade stations are rare due to space 
 required to construct these stations, 