In [1]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"

metro_data = pd.read_csv("VictoriaData.csv")

print(metro_data.head())


      ID  Postcode        Locality State   Longitude   Latitude  \
0   4746    3000.0       MELBOURNE   VIC  144.982585 -37.814437   
1   4748    3002.0  EAST MELBOURNE   VIC  144.982585 -37.814437   
2   4749    3003.0  WEST MELBOURNE   VIC  144.949592 -37.810871   
3   4750    3004.0       MELBOURNE   VIC  144.982585 -37.814437   
4  20754    3006.0     SOUTH WHARF   VIC  144.952074 -37.825287   

        Category           Type      SA3        SA3 Name  ...  \
0  Delivery Area  Delivery Area  20604.0  Melbourne City  ...   
1  Delivery Area  Delivery Area  20604.0  Melbourne City  ...   
2  Delivery Area  Delivery Area  20604.0  Melbourne City  ...   
3  Delivery Area  Delivery Area  20605.0    Port Phillip  ...   
4  Delivery Area  Delivery Area  20605.0    Port Phillip  ...   

                  PHN Name    LGA Region LGA Code Electorate  \
0  North Western Melbourne     Melbourne  24600.0  Melbourne   
1  North Western Melbourne     Melbourne  24600.0  Melbourne   
2  North Weste

In [2]:
# checking for missing values
missing_values = metro_data.isnull().sum()

In [3]:
# checking data types
data_types = metro_data.dtypes


In [4]:
# checking for missing values
missing_values = metro_data.isnull().sum()

# checking data types
data_types = metro_data.dtypes

missing_values

Unnamed: 0,0
ID,26
Postcode,32
Locality,32
State,32
Longitude,32
Latitude,32
Category,32
Type,32
SA3,32
SA3 Name,32


In [6]:
# defining a color scheme for the metro lines
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}

delhi_map_with_line_tooltip = folium.Map(location=[-37.817403, 144.956776], zoom_start=11)

# adding colored markers for each metro station with line name in tooltip
for index, row in metro_data.iterrows():
    # Check if Latitude and Longitude are valid
    if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
        line = row['Line']
        color = line_colors.get(line, 'black')  # Default color is black if line not found in the dictionary
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=f"{row['SA3 Name']}",
            tooltip=f"{row['SA3 Name']}, {line}",
            icon=folium.Icon(color=color)
        ).add_to(delhi_map_with_line_tooltip)

# Displaying the updated map
delhi_map_with_line_tooltip

In [None]:
stations_per_line = metro_data['Line'].value_counts()

# calculating the total distance of each metro line (max distance from start)
total_distance_per_line = metro_data.groupby('Line')['LGA Code'].max()

avg_distance_per_line = total_distance_per_line / (stations_per_line - 1)

line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})

# sorting the DataFrame by the number of stations
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)

line_analysis.reset_index(drop=True, inplace=True)
print(line_analysis)

                 Line  Number of Stations  \
0           Blue line                  24   
1         Voilet line                  21   
2         Yellow line                  17   
3           Pink line                  17   
4        Magenta line                  16   
5            Red line                  15   
6           Aqua line                  11   
7          Green line                   9   
8         Rapid Metro                   3   
9         Orange line                   3   
10   Blue line branch                   2   
11  Green line branch                   1   
12          Gray line                   1   

    Average Distance Between Stations (km)  
0                             2.735000e+03  
1                             1.189130e+03  
2                             2.506000e+04  
3                                      inf  
4                             3.407500e+03  
5                                      inf  
6                             1.714000e+03  
7        

In [None]:
# creating subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations Per Metro Line',
                                                    'Average Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.2)

# plot for Number of Stations per Line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of Stations'],
           orientation='h', name='Number of Stations', marker_color='crimson'),
    row=1, col=1
)

# plot for Average Distance Between Stations
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
           orientation='h', name='Average Distance (km)', marker_color='navy'),
    row=1, col=2
)

# update xaxis properties
fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

# update yaxis properties
fig.update_yaxes(title_text="Metro Line", row=1, col=1)
fig.update_yaxes(title_text="", row=1, col=2)

# update layout
fig.update_layout(height=600, width=1200, title_text="Metro Line Analysis", template="plotly_white")

fig.show()

In [None]:
layout_counts = metro_data['Electorate Rating'].value_counts()

# creating the bar plot using Plotly
fig = px.bar(x=layout_counts.index, y=layout_counts.values,
             labels={'x': 'Electorate Rating', 'y': 'Number of Stations'},
             title='Distribution of Australia Metro Station Layouts',
             color=layout_counts.index,
             color_continuous_scale='pastel')

# updating layout for better presentation
fig.update_layout(xaxis_title="Electorate Rating",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")

fig.show()