In [1]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"

In [2]:
pip install folium 

Note: you may need to restart the kernel to use updated packages.


In [4]:
metro_data = pd.read_csv("/kaggle/input/metro-network-delhi/DelhiMetroNetwork.csv")

metro_data.head(10)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326
5,6,Dilli Haat INA [Conn: Yellow],24.9,Pink line,2018-06-08,Underground,28.574408,77.210241
6,7,Noida Sector 143,11.5,Aqua line,2019-01-25,Elevated,28.502663,77.426256
7,8,Moolchand,15.1,Voilet line,2010-03-10,Elevated,28.56417,77.23423
8,9,Chawri Bazar,15.3,Yellow line,2005-03-07,Underground,28.64931,77.22637
9,10,Maya Puri,12.8,Pink line,2018-03-14,Elevated,28.637179,77.129733


In [5]:
metro_data.tail(5)

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
280,281,Bata Chowk,38.3,Voilet line,2015-06-09,Elevated,28.385836,77.313462
281,282,Dwarka Sector 12,5.8,Blue line,2006-01-04,Elevated,28.59232,77.04051
282,283,Noida Sector 18,43.6,Blue line,2009-12-11,Elevated,28.57081,77.32612
283,284,Knowledge Park II,21.4,Aqua line,2019-01-25,Elevated,28.456867,77.500054
284,285,Mayur Vihar Extention,39.5,Blue line,2009-12-11,Elevated,28.594158,77.294589


In [6]:
metro_data.shape

(285, 8)

In [8]:
missing_values = metro_data.isnull().sum()
missing_values

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [9]:
data_types = metro_data.dtypes
data_types

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

In [10]:
print("Basic Information about the Dataset:")
print(metro_data.info())

Basic Information about the Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Station ID                285 non-null    int64  
 1   Station Name              285 non-null    object 
 2   Distance from Start (km)  285 non-null    float64
 3   Line                      285 non-null    object 
 4   Opening Date              285 non-null    object 
 5   Station Layout            285 non-null    object 
 6   Latitude                  285 non-null    float64
 7   Longitude                 285 non-null    float64
dtypes: float64(3), int64(1), object(4)
memory usage: 17.9+ KB
None


In [11]:
print("\nSummary Statistics for Numerical Columns:")
print(metro_data.describe())


Summary Statistics for Numerical Columns:
       Station ID  Distance from Start (km)    Latitude   Longitude
count  285.000000                285.000000  285.000000  285.000000
mean   143.000000                 19.218947   28.595428   77.029315
std     82.416625                 14.002862    0.091316    2.875400
min      1.000000                  0.000000   27.920862   28.698807
25%     72.000000                  7.300000   28.545828   77.107130
50%    143.000000                 17.400000   28.613453   77.207220
75%    214.000000                 28.800000   28.666360   77.281165
max    285.000000                 52.700000   28.878965   77.554479


In [12]:
df = pd.DataFrame(metro_data)
histogram_fig = px.histogram(df, x='Distance from Start (km)', nbins=10, title='Distribution of Distance from Start (km)')
histogram_fig.show()

In [13]:
df = pd.DataFrame(metro_data)

df['Opening Date'] = pd.to_datetime(df['Opening Date'])

line_fig = px.line(df, x='Station ID', y='Opening Date', title='Trend of Opening Dates of Stations',
                   color_discrete_sequence=['skyblue'])

fig_pie = px.pie(df, 
                 names='Station Layout', 
                 title='Station Layout Distribution', 
                 template='plotly_white')

fig_pie.update_traces(marker=dict(colors=['#ffcccb', '#add8e6']), textinfo='percent+label')

box_fig = px.box(df, x='Line', y='Distance from Start (km)', title='Distribution of Distances from Start for Each Line')

scatter_fig = px.scatter(df, x='Longitude', y='Latitude', color='Line', hover_data=['Station Name'],
                         title='Metro Stations by Latitude and Longitude')

line_counts = df['Line'].value_counts().reset_index()
line_counts.columns = ['Line', 'Count']

fig = px.pie(line_counts, values='Count', names='Line', title='Distribution of Metro Lines')
fig.show()

line_fig.show()
box_fig.show()
scatter_fig.show()
fig_pie.show()

In [14]:
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'])

In [1]:
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

In [16]:
delhi_map_with_line_tooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

In [17]:
for index, row in metro_data.iterrows():
    line = row['Line']
    color = line_colors.get(line, 'black') 
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['Station Name']}",
        tooltip=f"{row['Station Name']}, {line}",
        icon=folium.Icon(color=color)
      ).add_to(delhi_map_with_line_tooltip)

In [18]:
delhi_map_with_line_tooltip

In [19]:
metro_data['Opening Year'] = metro_data['Opening Date'].dt.year

In [20]:

stations_per_year = metro_data['Opening Year'].value_counts().sort_index()

In [21]:
stations_per_year_df = stations_per_year.reset_index()
stations_per_year_df.columns = ['Year', 'Number of Stations']

In [22]:
fig = px.bar(stations_per_year_df,x='Year',y='Number of Stations',
             title="Number of Metro Stations Opened Each Year in Delhi",
             labels={'Number of Stations':'Number of Stations Opened'},
             text_auto = False,
             color='Year'
             )

fig.update_layout(xaxis_tickangle = -60,xaxis = dict(tickmode='linear'))
fig.update_layout(width=1400,height=600)

fig.show()

In [23]:
stations_per_line = metro_data['Line'].value_counts()
total_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()

avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)

line_analysis.reset_index(drop=True, inplace=True)
print(line_analysis)


                 Line  Number of Stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance Between Stations (km)  
0                                 1.355000  
1                                 1.097917  
2                                 1.157143  
3                                 1.950000  
4                                 1.240000  
5                                 1.050000  
6                                 1.379167  
7        

In [24]:
fig = make_subplots(rows=1,cols=2,subplot_titles=('Number of Stations Per Metro Line',
                                                  'Average Distance Between Stations Per Metro Line (km)'),
                                                  horizontal_spacing=0.2)

fig.add_trace(
    go.Bar(y=line_analysis['Line'],x=line_analysis['Number of Stations'],
           orientation='h',name='Number Of Stations',marker_color='purple'),
           row=1,col=1)

fig.add_trace(
    go.Bar(y=line_analysis['Line'],x=line_analysis['Average Distance Between Stations (km)'],
           orientation='h',name='Average Distance (km)',marker_color='olive'),
           row=1,col=2)

fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

fig.update_yaxes(title_text="Metro Line",row=1,col=1)
fig.update_layout(height=600,width=1400,title_text='Metro Line Analysis')

fig.show()

In [25]:
layout_counts = metro_data['Station Layout'].value_counts()
fig = px.bar(x=layout_counts.index, y=layout_counts.values,
             labels={'x': 'Station Layout', 'y': 'Number of Stations'},
             title='Distribution of Delhi Metro Station Layouts',
             color=layout_counts.index,
             color_continuous_scale='pastel')

In [26]:
fig.update_layout(xaxis_title="Station Layout",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")

fig.show()