## Notebook 1) Current State of Irish Rail

#### Section 1.1: Introduction

In [None]:
# Read in Libraries
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import geopandas as gpd
import shapely.geometry
import numpy as np
import plotly.graph_objects as go

In [None]:
# Supress warnings
import warnings
warnings.filterwarnings("ignore")

#### Section 1.2: Data Analytics

Source: https://data.gov.ie/dataset/tca01-passenger-journeys-by-rail-thousand

In [None]:
# Read in data
df = pd.read_csv("DS1) TCA01 - Passenger Journeys by Rail.csv")
df = df.drop(['STATISTIC Label', 'UNIT'], axis=1)
df

In [None]:
# Assess data
df.info()

In [None]:
# Filter data 
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
df_filtered = df[df['Year'] >= 2005]
df_filtered = df_filtered[df_filtered['Type of Journey'] != 'All journeys']
#df_filtered

In [None]:
#Pivot the df
df_pivot = df_filtered.pivot(index='Year', columns='Type of Journey', values='VALUE')

#This pivot funtion is a very good alternative to the more manual data manipulation.

# Plot the line chart
df_pivot.plot(kind='line', marker='o', figsize=(18, 8))
plt.title('Passenger Journeys by Rail', size = 16)
plt.xlabel('Year', size = 12)
plt.ylabel('Number of Journeys', size = 12)
plt.legend(title='Type of Journey', bbox_to_anchor=(1, 1), loc='upper right')
plt.xticks(df_pivot.index[::5])
plt.grid(True)
plt.show()

We can clearly see the dramatic impact that COVID had on the number of people using the rail network. 

#### Section 1.3: Geographical Visualisation

Source: https://data.gov.ie/en_GB/dataset/rail-network-national-250k-map-of-ireland/resource/b8af1c61-2211-4668-937b-4953ee0c5402

In [None]:
# Read the Rail Network GeoJSON file
rn_gdf = gpd.read_file("G1) Rail_Network.geojson")#
#rn_gdf

In [None]:
# Test Plot
#rn_gdf.plot(color='red', linewidth = 0.75)

Source: https://data.gov.ie/dataset/counties-national-statutory-boundaries-2019/resource/9f1da952-46ee-474a-a005-544b129bc970

In [None]:
# Read the GeoJSON file
c_gdf = gpd.read_file("G2) Counties.geojson")
#c_gdf

Source: https://data.gov.ie/en_GB/dataset/railway-stations-national-250k-map-of-ireland2

In [None]:
# Read the GeoJSON file
s_gdf = gpd.read_file("G3) Railway_Stations.geojson")
#s_gdf

In [None]:
# Plotting Rail map of Ireland
fig, ax = plt.subplots(figsize=(8,8))
c_gdf.boundary.plot(ax=ax, color='black', linewidth=0.5)
rn_gdf.plot(ax=ax, color='red', linewidth=0.75, label='Rail Network')
s_gdf.plot(ax=ax, color='blue', markersize=10, label='Railway Stations')
ax.set_axis_off()
plt.title('Irish Rail Network')
plt.legend(loc = 'upper left')
plt.show()

Whilst this map is a good depiction of the Irish rail network I wanted to further this by creating an interactive map. 

#### Section 1.4: Interactive Map

In [None]:
# Outline variables
lats = []
lons = []
names = []

for feature, name in zip(rn_gdf.geometry, rn_gdf.index):
    if isinstance(feature, shapely.geometry.linestring.LineString):
        linestrings = [feature]
    elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
        linestrings = feature.geoms
    else:
        continue
    for linestring in linestrings:
        x, y = linestring.xy
        lats = np.append(lats, y)
        lons = np.append(lons, x)
        names = np.append(names, [name] * len(y))
        lats = np.append(lats, None)
        lons = np.append(lons, None)
        names = np.append(names, None)

# Set the center for Ireland
center_coordinates = dict(lat=53.5, lon=-7.8)

# Create choropleth map 
fig = px.choropleth_mapbox(
    rn_gdf,
    geojson=c_gdf,
    locations=rn_gdf.index,
    color_discrete_sequence=['lightgrey'],
    opacity=0.5,
    mapbox_style="carto-positron",
    center=center_coordinates,
    zoom=6,
    title='Ireland Rail Network'
    
)

# Overlay rail network lines
fig.add_trace(go.Scattermapbox(
    lat=lats,
    lon=lons,
    mode='lines',
    text=names,
    hoverinfo='text',
    line=dict(color='red', width=2),  # Adjust line color and width as needed
    
))

# Plotting stations
lats_stations = s_gdf.geometry.y
lons_stations = s_gdf.geometry.x

# Overlay rail stations
stations_trace = go.Scattermapbox(
    lat=lats_stations,
    lon=lons_stations,
    mode='markers',
    marker=dict(size=8, color='blue'), 
    text=s_gdf['NAMN1'],
    hoverinfo='text'
)

# Add the trace to the existing figure
fig.add_trace(stations_trace)
fig.update_layout(showlegend=False)

# Set the size of the window
fig.update_layout(
    width=900, 
    height=800,  
)
fig.show()

#### Section 1.5: Choropleth Map

Source: https://www.cso.ie/en/releasesandpublications/ep/p-lfs/labourforcesurveyquarter12021/labourforce/

In [None]:
# Read in data
df = pd.read_csv("DS2) F7001 - Population in the Labour Force.csv")
#df

In [None]:
#Merge Dublin Values
dublin_value = (
    df.loc[df['County of Usual Residence'] == 'Dublin City', 'VALUE'].values[0] +
    df.loc[df['County of Usual Residence'] == 'Dún Laoghaire-Rathdown', 'VALUE'].values[0] + 
    df.loc[df['County of Usual Residence'] == 'South Dublin', 'VALUE'].values[0] + 
    df.loc[df['County of Usual Residence'] == 'Fingal', 'VALUE'].values[0]
)

# Create a new row for Dublin
dublin_row = {
    'Statistic Label': 'Population aged 15 Years and Over in the Labour Force',
    'Census Year': 2022,
    'Sex': 'Both sexes',
    'County of Usual Residence': 'Dublin',
    'Age Group': 'All ages',
    'Labour Force': 'All persons',
    'UNIT': 'Number',
    'VALUE': dublin_value,
}

# Append the new row to the DataFrame
df = df.append(dublin_row, ignore_index=True)

# Drop the original Dublin rows
df = df[~df['County of Usual Residence'].isin(['Dublin City', 'Dún Laoghaire-Rathdown', 'South Dublin', 'Fingal'])]

In [None]:
# Calculate Galway counties
galway_value = (
    df.loc[df['County of Usual Residence'] == 'Galway City', 'VALUE'].values[0] +
    df.loc[df['County of Usual Residence'] == 'Galway County', 'VALUE'].values[0]
)

# Create a new row for Galway
galway_row = {
    'Statistic Label': 'Population aged 15 Years and Over in the Labour Force',
    'Census Year': 2022,
    'Sex': 'Both sexes',
    'County of Usual Residence': 'Galway',
    'Age Group': 'All ages',
    'Labour Force': 'All persons',
    'UNIT': 'Number',
    'VALUE': galway_value,
}

# Append the new row to the DataFrame
df = df.append(galway_row, ignore_index=True)

# Drop the original Galway rows
df = df[~df['County of Usual Residence'].isin(['Galway City', 'Galway County'])]

In [None]:
# Replace "X City and X County" with "X" in the DataFrame
df.loc[df['County of Usual Residence'] == 'Cork City and Cork County', 'County of Usual Residence'] = 'Cork'
df.loc[df['County of Usual Residence'] == 'Limerick City and County', 'County of Usual Residence'] = 'Limerick'
df.loc[df['County of Usual Residence'] == 'Waterford City and County', 'County of Usual Residence'] = 'Waterford'

In [None]:
#df

In [None]:
# Convert county names to uppercase for both dataframes
c_gdf['COUNTY'] = c_gdf['COUNTY'].str.upper()
df['County of Usual Residence'] = df['County of Usual Residence'].str.upper()

# Merge the population data
merged_gdf = c_gdf.merge(df[['County of Usual Residence', 'VALUE']], 
                          left_on='COUNTY', 
                          right_on='County of Usual Residence', 
                          how='left')

# Rename the column
merged_gdf.rename(columns={'VALUE': 'Population'}, inplace=True)

# Drop unnecessary columns
merged_gdf.drop(['County of Usual Residence'], axis=1, inplace=True)

In [None]:
#merged_gdf

In [None]:
# Set the range for the colour scale
color_scale_range = [0, 200000]

# Create choropleth map for population density
fig_density = px.choropleth_mapbox(
    merged_gdf,
    geojson=merged_gdf.geometry,
    locations=merged_gdf.index,
    color='Population',
    color_continuous_scale="RdBu_r",
    opacity=0.7,
    mapbox_style="carto-positron",
    center=center_coordinates,
    zoom=6,
    title='Ireland Population Density',
    range_color=color_scale_range,  # Set the color scale range
)

# Overlay rail network map
fig_density.add_trace(go.Scattermapbox(
    lat=lats,
    lon=lons,
    mode='lines',
    text=names,
    hoverinfo='text',
    line=dict(color='#FFD700', width=2), 
))

# Set the size of the window
fig_density.update_layout(
    width=900, 
    height=800,  
)

fig_density.update_layout(showlegend=False)
fig_density.show()

I appeciate the size of this notebook is very large but I have taken a number of steps to reduce the size and processing power required to display the above visualisations. I also didn't wanted to remove any of the GeoJSON's above simply as I throught the visualisations were at a good level of detail and I didn't want to remove any of this detail. 

#### The End