In [1]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
import plotly.io as pio

In [2]:
pio.templates.default = 'plotly_white'

In [3]:
metro_data = pd.read_csv('Delhi-Metro-Network.csv')
metro_data.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,160,Shaheed Sthal(First Station),0.0,Red line,03-08-2019,Elevated,28.670611,77.415582
1,2,Welcome [Conn: Red],46.8,Pink line,31-10-2018,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,14-11-2013,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,25-12-2017,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,30-12-2005,Elevated,28.61932,77.03326


In [4]:
metro_data.describe()

Unnamed: 0,Station ID,Distance from Start (km),Latitude,Longitude
count,285.0,285.0,285.0,285.0
mean,143.0,19.218947,28.594727,77.029315
std,82.416625,14.002862,0.089887,2.8754
min,1.0,0.0,27.920862,28.698807
25%,72.0,7.3,28.545828,77.10713
50%,143.0,17.4,28.613453,77.20722
75%,214.0,28.8,28.66636,77.281165
max,285.0,52.7,28.744616,77.554479


In [5]:
metro_data.isna().sum()

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [6]:
metro_data.dtypes

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

In [7]:
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'],format="%d-%m-%Y")

In [8]:
metro_data['Opening Date'].dtypes

dtype('<M8[ns]')

In [9]:
metro_data['Station Name']

0      Shaheed Sthal(First Station)
1               Welcome [Conn: Red]
2                       DLF Phase 3
3                        Okhla NSIC
4                        Dwarka Mor
                   ...             
280                Raja Nahar Singh
281                Dwarka Sector 12
282                 Noida Sector 18
283               Knowledge Park II
284           Mayur Vihar Extention
Name: Station Name, Length: 285, dtype: object

In [10]:
metro_data['Line'].unique()

array(['Red line', 'Pink line', 'Rapid Metro', 'Magenta line',
       'Blue line', 'Aqua line', 'Voilet line', 'Yellow line',
       'Green line', 'Gray line', 'Orange line', 'Green line branch',
       'Blue line branch'], dtype=object)

In [11]:
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

In [12]:
delhi_map_with_line = folium.Map(location=[28.7041, 77.1025],zoom_start=11)

In [14]:
for index, row in metro_data.iterrows():
    line = row['Line']
    color = line_colors.get(line,"black")
    folium.Marker(
        location = [row['Latitude'],row['Longitude']],
        popup = f"{row['Station Name']}",
        tooltip = f"{row['Station Name']},{line}",
        icon=folium.Icon(color=color)
    ).add_to(delhi_map_with_line)

In [15]:
delhi_map_with_line

In [16]:
metro_data['Opening Year'] = metro_data['Opening Date'].dt.year

In [17]:
station_per_year = metro_data['Opening Year'].value_counts().sort_index()

In [18]:
station_per_year_df = station_per_year.reset_index()
station_per_year_df.columns = ['Year','Number of Stations']
station_per_year_df

Unnamed: 0,Year,Number of Stations
0,2002,6
1,2003,4
2,2004,11
3,2005,28
4,2006,9
5,2008,3
6,2009,17
7,2010,54
8,2011,13
9,2013,5


In [19]:
fig = px.bar(station_per_year_df,x='Year',y='Number of Stations',
             title="Number of Metro Stations Opened Each Year in Delhi",
             labels={'Number of Stations':'Number of Stations Opened'},
             text_auto = False,
             color='Year'
             )

fig.update_layout(xaxis_tickangle = -60,xaxis = dict(tickmode='linear'))
fig.update_layout(width=1400,height=600)


fig.show()

In [20]:
total = metro_data['Station ID'].count()
total

285

In [21]:
station_per_line = metro_data['Line'].value_counts()
station_per_line

Line
Blue line            49
Pink line            38
Yellow line          37
Voilet line          34
Red line             29
Magenta line         25
Aqua line            21
Green line           21
Rapid Metro          11
Blue line branch      8
Orange line           6
Gray line             3
Green line branch     3
Name: count, dtype: int64

In [22]:
total_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()
avg_distance_per_line = total_distance_per_line / (station_per_line - 1)

line_analysis = pd.DataFrame(
    {
        'Line':station_per_line.index,
        'Number of Stations' : station_per_line.values,
        'AVG Distance Between Stations' : avg_distance_per_line
    }
)

line_analysis = line_analysis.sort_values(by='Number of Stations',ascending=False)

line_analysis.reset_index(drop=True,inplace=True)
line_analysis

Unnamed: 0,Line,Number of Stations,AVG Distance Between Stations
0,Blue line,49,1.355
1,Pink line,38,1.097917
2,Yellow line,37,1.157143
3,Voilet line,34,1.95
4,Red line,29,1.24
5,Magenta line,25,1.05
6,Aqua line,21,1.379167
7,Green line,21,4.16
8,Rapid Metro,11,1.421622
9,Blue line branch,8,1.0


In [23]:
fig = make_subplots(rows=1,cols=2,subplot_titles=('Number of Stations Per Metro Line',
                                                  'Average Distance Between Stations Per Metro Line (km)'),
                                                  horizontal_spacing=0.2)

fig.add_trace(
    go.Bar(y=line_analysis['Line'],x=line_analysis['Number of Stations'],
           orientation='h',name='Number Of Stations',marker_color='crimson'),
           row=1,col=1
)

fig.add_trace(
    go.Bar(y=line_analysis['Line'],x=line_analysis['AVG Distance Between Stations'],
           orientation='h',name='Average Distance (km)',marker_color='navy'),
           row=1,col=2
)

fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

fig.update_yaxes(title_text="Metro Line",row=1,col=1)
fig.update_layout(height=600,width=1400,title_text='Metro Line Analysis')

fig.show()

In [24]:
layout_counts = metro_data['Station Layout'].value_counts()

In [25]:
fig = px.bar(x=layout_counts.index,y=layout_counts.values,
             labels={'x':'Station Layout', 'y': 'Number of Stations'},
             title='Distribution of Delhi Metro Layouts',
             color=layout_counts.index,
             color_continuous_scale='pastel',
             text_auto=True,
             hover_name=layout_counts.index)

fig.update_layout(coloraxis_showscale=False,template="plotly_white")
fig.update_layout(height=400,width=600)

fig.show()

In [27]:
s = metro_data['Station Layout'].isin(['At-Grade'])
metro_data[s]['Station Name']

111    Kirti Nagar [Conn: Blue]
115                 Yamuna Bank
274                 Yamuna Bank
Name: Station Name, dtype: object