## 1. Importing the libraries:

In [87]:
import pandas as pd
import numpy as np
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = 'plotly_white'

import warnings

warnings.filterwarnings("ignore")

## 2. Reading the dataset:

In [7]:
metro_data = pd.read_csv("Delhi-Metro-Network.csv")

In [18]:
metro_data.head(2)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756


## 3. Performing EDA:

In [19]:
metro_data.describe()

Unnamed: 0,Station ID,Distance from Start (km),Latitude,Longitude
count,285.0,285.0,285.0,285.0
mean,143.0,19.218947,28.595428,77.029315
std,82.416625,14.002862,0.091316,2.8754
min,1.0,0.0,27.920862,28.698807
25%,72.0,7.3,28.545828,77.10713
50%,143.0,17.4,28.613453,77.20722
75%,214.0,28.8,28.66636,77.281165
max,285.0,52.7,28.878965,77.554479


#### 3.1 Checking the NULL Values:

In [29]:
metro_data.isna().sum()

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

#### 3.2 Checking the data types:

In [36]:
metro_data.dtypes

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

###### The opening date column is represented as an object, so let's convert it to datetime object:

In [39]:
metro_data.head(2)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756


In [40]:
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'])

In [44]:
metro_data.dtypes

Station ID                           int64
Station Name                        object
Distance from Start (km)           float64
Line                                object
Opening Date                datetime64[ns]
Station Layout                      object
Latitude                           float64
Longitude                          float64
dtype: object

There are no any NULL values, and the data type for Opening Date column has been changed to datetime format,
now, let's start the analysis.

## 4. Geo-Spatial Analysis:

In [80]:
metro_data['Line'].value_counts()

Blue line            49
Pink line            38
Yellow line          37
Voilet line          34
Red line             29
Magenta line         25
Aqua line            21
Green line           21
Rapid Metro          11
Blue line branch      8
Orange line           6
Gray line             3
Green line branch     3
Name: Line, dtype: int64

In [78]:
metro_data['Line'].value_counts().sum()

285

In [79]:
metro_data.shape

(285, 8)

In [50]:
print(f"In total we have: {metro_data['Line'].nunique()} unique lines, " 
      "so, let's assign a unique color to them so that we can plot it.")

In total we have: 13 unique lines, so, let's assign a unique color to them so that we can plot it.


#### 4.1 Assigning unique colors to each of the railway lines:

In [81]:
line_colors = {
    "Blue line": "blue",
    "Pink line": "pink",
    "Yellow line": "yellow",
    "Voilet line": "purple",
    "Red line": "red",
    "Magenta line": "black",
    "Aqua line": "lightblue",
    "Green line": "green",
    "Rapid Metro": "cadetblue",
    "Blue line branch": "darkpurple",
    "Orange line": "orange",
    "Gray line": "beige",
    "Green line branch": "lightgreen"
}

Let's visualize the tracks now:

In [95]:
print("The map below shows the line connection of different railway lines, hover over to get more info")
delhi_map_w_line_tooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)
for index, row  in metro_data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black')  # If line name is not found in dictionary, the default is black
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup = row['Station Name'],
        tooltip=f" Station name: {row['Station Name']} \n Line: {line}",
        icon=folium.Icon(color=color)
    ).add_to(delhi_map_w_line_tooltip)

delhi_map_w_line_tooltip



The map below shows the line connection of different railway lines, hover over to get more info


## 5. The growth of railways over the time:

#### 5.1 Adding an extra column for the years:

In [105]:
metro_data['year'] = metro_data['Opening Date'].dt.year

In [108]:
metro_data.head(2)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude,year
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239,2008
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756,2018


In [115]:
num_of_stations_per_year = metro_data['year'].value_counts()
num_of_stations_per_year_df = num_of_stations_per_year.reset_index()
num_of_stations_per_year_df.head(2)

Unnamed: 0,index,year
0,2018,64
1,2010,54


#### 5.2 Renaming the columns in the new railway count dataframe:

In [118]:
num_of_stations_per_year_df.rename(columns={
    "index": "Year",
    "year": "Number of railway tracks"
}, inplace=True)

In [131]:
fig = px.bar(data_frame=num_of_stations_per_year_df, x='Year', y='Number of railway tracks', 
            text='Number of railway tracks',
            title="Number of metro stations opened each year from 2002 to 2019")

fig.update_layout(yaxis_title = "Number of stations",
                 xaxis_tickangle=-45, xaxis=dict(tickmode='linear'))

fig.update_traces(textposition='outside')
fig.show()

Some of the years didn't have any new connection, it could be due to various reasons:
1. Lack of planning.
2. Financial fundings or 
3. Construction challenges

From the above chart, **2018 has the highest number of railway tracks opened.**
Now, let's see which and how many tracks were opened in that year.

In [146]:
metro_2018_data = metro_data[metro_data['year'] == 2018][['Station Name', 'Distance from Start (km)', 'Station Layout']]

In [151]:
print(f"In the year 2018, a total of {metro_2018_data['Distance from Start (km)'].sum():.2f} KM distance were covered "
     "with the addition of new railway tracks.")

In the year 2018, a total of 1540.90 KM distance were covered with the addition of new railway tracks.


In [154]:
station_layoout = metro_2018_data['Station Layout'].value_counts().reset_index()

In [156]:
station_layoout.rename(columns={
    "index": "Station Layout",
    "Station Layout": "Total"
}, inplace=True)

In [171]:
fig = px.bar(data_frame=station_layoout, x="Station Layout", y="Total", color="Station Layout", 
       text="Total", title="Total of different layouts added in the year 2018")

fig.update_traces(textposition='outside', textfont_size=14)
fig.show()

## 6. Line Analysis:

Now, let's analyize the number of lines in terms of the number of stations and the average distance
between the stations.

In [174]:
metro_data.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude,year
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239,2008
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756,2018
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935,2013
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849,2017
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326,2005


In [269]:
total_distance_per_line = metro_data.groupby("Line")['Distance from Start (km)'].max().reset_index()
total_number_of_lines = metro_data['Line'].value_counts().reset_index()

In [270]:
total_number_of_lines.rename(columns={'index': 'Line', 'Line':'total_stops'}, inplace=True)

In [271]:
distance_per_line_df = pd.merge(total_distance_per_line, total_number_of_lines, on='Line', how='inner')

In [274]:
distance_per_line_df['average_km_per_station'] = distance_per_line_df['Distance from Start (km)'] / (distance_per_line_df['total_stops'] - 1)

###### The summary of total distance covered,total number of stops,and average distance in KM per station:

In [273]:
distance_per_line_df

Unnamed: 0,Line,Distance from Start (km),total_stops,average_km_per_station
0,Aqua line,27.1,21,1.355
1,Blue line,52.7,49,1.097917
2,Blue line branch,8.1,8,1.157143
3,Gray line,3.9,3,1.95
4,Green line,24.8,21,1.24
5,Green line branch,2.1,3,1.05
6,Magenta line,33.1,25,1.379167
7,Orange line,20.8,6,4.16
8,Pink line,52.6,38,1.421622
9,Rapid Metro,10.0,11,1.0


#### Now, let's visualize these results:

In [321]:
# Creating the sub plots first with 1*2 dimension:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Average distance in KM per stops", "Total number of stops per line"),
                   horizontal_spacing=0.2)

# Plotting the average distance in KM per stops:
fig.add_trace(go.Bar(y=distance_per_line_df['Line'], x=distance_per_line_df['average_km_per_station'],
                    name='Average distance in KM per station', marker_color='crimson', orientation='h'),
             row=1, col=1)



# Plotting the count of stops in each lines:
fig.add_trace(go.Bar(x=distance_per_line_df['total_stops'], y=distance_per_line_df['Line'],
                    name='Tota Number of stops in Each Line', marker_color='navy',orientation='h'),
             row=1, col=2)


# Updating X and Y axis properties:
fig.update_xaxes(title_text="Average Distance in KM", row=1, col=1)
fig.update_xaxes(title_text="Number of stations", row=1,col=2)

fig.update_yaxes(title_text="Metro Lines", row=1, col=1)

# Update layout:
fig.update_layout(height=550, width=950, title_text="Metro Line Analysis")

fig.show()

## 7. Station Layout Analysis:

In [331]:
station_layout_df = metro_data['Station Layout'].value_counts().reset_index()
station_layout_df

Unnamed: 0,index,Station Layout
0,Elevated,214
1,Underground,68
2,At-Grade,3


In [342]:
fig = px.bar(data_frame=station_layout_df, x='index', y='Station Layout', color='index',
        title='Delhi Metro Station Layout Count', text='Station Layout',
        labels={'index':'Layouts', 'Station Layout':'Number of stations'})


fig.update_traces(textposition = 'outside')


fig.show()