# Converting Raw Dataset to Excel Sheet With the Data Processed To Be Sectioned By Station

In [2]:
import csv
import pandas as pd
import numpy as np
import os 
import warnings
warnings.filterwarnings("ignore")

# Target Variable - Station Catchment Area Rail Mode Share

In [32]:
#Cardiff Data
cardiff_df = pd.read_excel("Data/GPS Data/GPS_Data_SE_Wales_OD_Matrix.xlsx", sheet_name="WkdyTripsToCardiffCentralOnly")
target = cardiff_df.dropna(subset=["Rail share >100 (Num)"])

# Station Proximity (Distance to nearest station from current station)

In [33]:
import geopandas as gpd
from shapely.ops import nearest_points

station_catchment_gpd = gpd.read_file("Data/Station Catchment & Frequency/800m Station Catchment Shapefiles/0_All SE Wales Stations.shp")

#Get Station Name and Code
test = station_catchment_gpd["MERGE_SRC"].str.split("_")
station_names = []
for row in test:
    station_names.append(row[1])
station_catchment_gpd["station_names"] = station_names
placeholder = station_catchment_gpd["MERGE_SRC"].str.split("\\")
split_string = []
for pair in placeholder:
    split_string.append(pair[1])
station_codes = []
for s in split_string:
    station_codes.append(int(s.split("_")[0]))
station_catchment_gpd["station_code"] = station_codes


#Station Nearest Station Point Coords
station_catchment_gpd["centroid"] = station_catchment_gpd.centroid
#station_catchment_gpd.insert(5, 'nearest_geometry', None)

for index, row in station_catchment_gpd.iterrows():
    point = row.geometry
    multipoint = station_catchment_gpd.drop(index, axis=0).geometry.unary_union
    queried_geom, nearest_geom = nearest_points(point, multipoint)
    station_catchment_gpd.loc[index, 'nearest_geometry'] = nearest_geom


centroids_gpd = station_catchment_gpd.copy()
centroids_gpd["geometry"] = station_catchment_gpd.centroid
#centroids_gpd.insert(4, 'nearest_geometry', None)

#Nearest point coords
for index, row in centroids_gpd.iterrows():
    point = row.geometry
    multipoint = centroids_gpd.drop(index, axis=0).geometry.unary_union
    queried_geom, nearest_geom = nearest_points(point, multipoint)
    centroids_gpd.loc[index, 'nearest_geometry'] = nearest_geom

#Get nearest station name
centroids_gpd.insert(4, 'nearest_station_names', None)
for index, row in centroids_gpd.iterrows():
    for i, r in centroids_gpd.iterrows():
        if index != i:
            if row.nearest_geometry == r.geometry:
                centroids_gpd.loc[index, "nearest_station_names"] = r.MERGE_SRC
                
#Get distance between station and nearest station
nearest_stations = centroids_gpd[["geometry","nearest_geometry"]]
nearest_stations["geometry"] = nearest_stations["nearest_geometry"]
centroids_gpd["nearest_station_distance"] = centroids_gpd.distance(nearest_stations, align=False)

#Get Station Name and Code
test = centroids_gpd["nearest_station_names"].str.split("_")
station_names = []
for row in test:
    station_names.append(row[1])
centroids_gpd["nearest_station_name"] = station_names
placeholder = centroids_gpd["nearest_station_names"].str.split("\\")
split_string = []
for pair in placeholder:
    split_string.append(pair[1])
station_codes = []
for s in split_string:
    station_codes.append(int(s.split("_")[0]))
centroids_gpd["nearest_station_code"] = station_codes
centroids_gpd = centroids_gpd.drop(columns=["nearest_station_names"])
station_catchment_gpd[["nearest_station_distance","nearest_station_name","nearest_station_code"]] = centroids_gpd[["nearest_station_distance","nearest_station_name",	"nearest_station_code"]]

# Car Parking Availability

In [72]:
import math
parking_df = pd.read_excel("Data\CONFIDENTIAL StIP_Car Parking Schedule_DRAFT FOR ISSUE 20190723.xlsx", sheet_name="Schedule")
parking_df = parking_df.drop(parking_df.tail(3).index)

parking_df = parking_df.drop(columns=["Station and Type","Unnamed: 2", "Existing Situation - desktop study", "Unnamed: 4", "Unnamed: 7", "Unnamed: 8"])
columns = list(parking_df.iloc[0])
parking_df = parking_df.drop(parking_df.head(1).index)
parking_df = parking_df.set_axis(columns, axis='columns')
parking_df = parking_df[parking_df.Station.isin(station_catchment_gpd["station_names"])]

parking_spaces = []
parking_occupancy = []
for i, row in station_catchment_gpd.iterrows():
    if row.station_names in list(parking_df["Station"]):
        other_row = parking_df.loc[parking_df['Station'] == row.station_names].values
        parking_spaces.append((other_row[0][1]))
        parking_occupancy.append(other_row[0][2])
    else:
        parking_spaces.append(math.nan)
        parking_occupancy.append(math.nan)


station_catchment_gpd["Parking_Spaces"] = parking_spaces
station_catchment_gpd["Parking_Occupancy"] = parking_occupancy

# Station Journey Time Difference, car vs train (peak, weekday)

In [73]:
import csv

#Journeys
jny_df = pd.read_excel("Data/Journey Time & Rail Share/Station_Journey_Time_Comparison_to_Cardiff_Central_Sector.xlsx", sheet_name = "AllSEWalesStationsJnyTimes")
jny_df.sort_values(by="Station Name")

#Add rail journey times to station catchment gpd
station_catchment_gpd = station_catchment_gpd.drop_duplicates(subset=["station_names"])
station_catchment_gpd = station_catchment_gpd[station_catchment_gpd['station_names'].isin(jny_df["Station Name"])]
station_catchment_gpd = station_catchment_gpd.sort_values(by="station_names")
station_catchment_gpd = station_catchment_gpd.reset_index()
station_catchment_gpd["Rail_Journey_Time_Cardiff_Central_mins"] = jny_df["Rail Journey Time to Cardiff Central (mins)"].copy()

#Add car journey times to station catchment gpd
car_jny_df = pd.read_csv("Data/Journey Time & Rail Share/GoogleMapsPeakWeekdayJourneyTimes.csv", quoting=csv.QUOTE_NONE)
car_jny_df = car_jny_df.iloc[: , 1:]

station = []
means = []

for column in car_jny_df:
    vals = car_jny_df[column]
    sums = sum(vals)
    mean = sums/30
    mean_min = mean/60
    station.append(column)
    means.append(mean_min)

#Add mean to main station_catchment_data
d = {"station_name": station, "mean": means}
means_df = pd.DataFrame(data=d)
means_df = means_df.drop_duplicates(subset=["station_name"])
means_df = means_df[means_df['station_name'].isin(station_catchment_gpd["station_names"])]
means_df = means_df.sort_values(by = "station_name")
station_catchment_gpd["car_journey_times"] = means_df["mean"]
try:
    station_catchment_gpd = station_catchment_gpd.drop(["level_0", "index"],axis=1)
except KeyError:
    pass

#Find journey time saving by rail
station_catchment_gpd["Journey_Time_Savings_By_Rail"] = station_catchment_gpd["car_journey_times"] - station_catchment_gpd["Rail_Journey_Time_Cardiff_Central_mins"]

# Population Age distribution by station catchment

In [74]:
#Load in data
age_df = pd.read_excel("Data/Station Catchment & Frequency/SE_Wales_Station_800m_Catchment.xlsx", sheet_name = "Sheet1")
age_df = age_df[age_df['Station name'].isin(station_catchment_gpd["station_names"])]
age_df = age_df.sort_values(by="Station name")
age_df["Ratio_15_and_under"] = age_df["Sum Aged 15 and under"]/age_df["Sum TOTAL All ages"]
age_df["Ratio_16_to_24"] = age_df["Sum Aged 16 to 24"]/age_df["Sum TOTAL All ages"]
age_df["Ratio_25_to_34"] = age_df["Sum Aged 25 to 34"]/age_df["Sum TOTAL All ages"]
age_df["Ratio_35_to_49"] = age_df["Sum Aged 35 to 49"]/age_df["Sum TOTAL All ages"]
age_df["Ratio_50_to_64"] = age_df["Sum Aged 50 to 64"]/age_df["Sum TOTAL All ages"]
age_df["Ratio_65_and_over"] = age_df["Sum Aged 25 to 34"]/age_df["Sum TOTAL All ages"]
# By using concat()
station_catchment_gpd = pd.concat([station_catchment_gpd,age_df],axis=1,join='inner')
station_catchment_gpd = station_catchment_gpd.drop(["Station name", "No of centroid points"],axis=1)

# Car ownership levels by station catchment areas

In [75]:
car_ownership_gpd = pd.read_csv("Data/car_ownership_2021_wales.csv")
ONS_shapefiles = gpd.read_file("Data\ONS_local_authority_districts\LAD_MAY_2022_UK_BFE_V3.shp")

In [76]:
str_beginning = "W060000"
SE_Wales_end_code = [13,14,15,16,18,19,20,21,22,24] 
SE_wales_LA_codes = []

for code in SE_Wales_end_code:
    SE_wales_LA_codes.append(str_beginning+str(code))

car_ownership_gpd = car_ownership_gpd[car_ownership_gpd["Lower tier local authorities Code"].isin(SE_wales_LA_codes)]
ONS_shapefiles = ONS_shapefiles[ONS_shapefiles["LAD22CD"].isin(SE_wales_LA_codes)]
car_ownership_gpd = car_ownership_gpd.sort_values(by="Lower tier local authorities")
ONS_shapefiles = ONS_shapefiles.sort_values(by="LAD22NM")

observations_no_cars = []
for index,row in car_ownership_gpd.iterrows():
    if row["Car or van availability (5 categories) Code"] == 0:
        observations_no_cars.append(row["Observation"])
observations_1_cars = []
for index,row in car_ownership_gpd.iterrows():
    if row["Car or van availability (5 categories) Code"] == 1:
        observations_1_cars.append(row["Observation"])
observations_2_cars = []
for index,row in car_ownership_gpd.iterrows():
    if row["Car or van availability (5 categories) Code"] == 2:
        observations_2_cars.append(row["Observation"])
observations_3_or_more_cars = []
for index,row in car_ownership_gpd.iterrows():
    if row["Car or van availability (5 categories) Code"] == 3:
        observations_3_or_more_cars.append(row["Observation"])
        
placeholder = car_ownership_gpd.groupby(car_ownership_gpd['Lower tier local authorities']).aggregate(sum)
totals = list(placeholder["Observation"].copy())

ONS_shapefiles["Total Population"] = totals
ONS_shapefiles["No cars population"] = observations_no_cars
ONS_shapefiles["1 car population"] = observations_1_cars
ONS_shapefiles["2 cars population"] = observations_2_cars
ONS_shapefiles["3 or more cars population"] = observations_3_or_more_cars
ONS_shapefiles["No cars ratio"] = ONS_shapefiles["No cars population"]/ONS_shapefiles["Total Population"]
ONS_shapefiles["1 car ratio"] = ONS_shapefiles["1 car population"]/ONS_shapefiles["Total Population"]
ONS_shapefiles["2 cars ratio"] = ONS_shapefiles["2 cars population"]/ONS_shapefiles["Total Population"]
ONS_shapefiles["3 or more cars ratio"] = ONS_shapefiles["3 or more cars population"]/ONS_shapefiles["Total Population"]

#Match stations to ONS local authorities
stations_gpd = station_catchment_gpd.copy()
stations_gpd["geometry"] = stations_gpd["centroid"].copy()

station_ONS_gpd = gpd.sjoin(stations_gpd,ONS_shapefiles,how='left',op='within')
station_catchment_gpd["No_cars_ratio"] = station_ONS_gpd["No cars ratio"].copy()
station_catchment_gpd["1_car_ratio"] = station_ONS_gpd["1 car ratio"].copy()
station_catchment_gpd["2_cars_ratio"] = station_ONS_gpd["2 cars ratio"].copy()
station_catchment_gpd["3_or_more_cars_ratio"] = station_ONS_gpd["3 or more cars ratio"].copy()

# WIMD by station catchments

- Education
- Employment
- Health
- Income
- Overall
~

Education + employment

runtime - about 20 seconds

In [77]:
from shapely import wkt

#Loading in education data
WIMD_education = gpd.read_file("Data/wimd2019_education.csv")

WIMD_education["geometry"] = WIMD_education["geom"].apply(wkt.loads)
WIMD_education = gpd.GeoDataFrame(WIMD_education)
WIMD_station_gpd = gpd.overlay(WIMD_education, station_catchment_gpd, how='intersection')
WIMD_station_gpd['rank'] = WIMD_station_gpd['rank'].astype('int')
mean_ranks = WIMD_station_gpd[["station_code","rank"]].groupby("station_code").mean()
mean_ranks = mean_ranks.sort_values(by="station_code")
station_catchment_gpd = station_catchment_gpd.sort_values(by="station_code")
station_catchment_gpd["average_catchment_LSOA_education_rank"] = mean_ranks.values

#Loading in employment data
WIMD_employment = gpd.read_file("Data/wimd2019_employment.csv")

WIMD_employment["geometry"] = WIMD_employment["geom"].apply(wkt.loads)
WIMD_employment = gpd.GeoDataFrame(WIMD_employment)
WIMD_station_gpd = gpd.overlay(WIMD_employment, station_catchment_gpd, how='intersection')
WIMD_station_gpd['rank'] = WIMD_station_gpd['rank'].astype('int')
mean_ranks = WIMD_station_gpd[["station_code","rank"]].groupby("station_code").mean()
mean_ranks = mean_ranks.sort_values(by="station_code")
station_catchment_gpd = station_catchment_gpd.sort_values(by="station_code")
station_catchment_gpd["average_catchment_LSOA_employment_rank"] = mean_ranks.values


Income, health, overall

In [78]:
#Loading in income data
WIMD_income = gpd.read_file("Data/wimd2019_income.csv")

WIMD_income["geometry"] = WIMD_income["geom"].apply(wkt.loads)
WIMD_income = gpd.GeoDataFrame(WIMD_income)
WIMD_station_gpd = gpd.overlay(WIMD_income, station_catchment_gpd, how='intersection')
WIMD_station_gpd['rank'] = WIMD_station_gpd['rank'].astype('int')
mean_ranks = WIMD_station_gpd[["station_code","rank"]].groupby("station_code").mean()
mean_ranks = mean_ranks.sort_values(by="station_code")
station_catchment_gpd = station_catchment_gpd.sort_values(by="station_code")
station_catchment_gpd["average_catchment_LSOA_income_rank"] = mean_ranks.values

#Loading in health data
WIMD_health = gpd.read_file("Data/wimd2019_health.csv")

WIMD_health["geometry"] = WIMD_health["geom"].apply(wkt.loads)
WIMD_health = gpd.GeoDataFrame(WIMD_health)
WIMD_station_gpd = gpd.overlay(WIMD_health, station_catchment_gpd, how='intersection')
WIMD_station_gpd['rank'] = WIMD_station_gpd['rank'].astype('int')
mean_ranks = WIMD_station_gpd[["station_code","rank"]].groupby("station_code").mean()
mean_ranks = mean_ranks.sort_values(by="station_code")
station_catchment_gpd = station_catchment_gpd.sort_values(by="station_code")
station_catchment_gpd["average_catchment_LSOA_health_rank"] = mean_ranks.values

#Loading in overal WIMD data
WIMD_overall = gpd.read_file("Data/wimd2019_overall.csv")

WIMD_overall["geometry"] = WIMD_overall["geom"].apply(wkt.loads)
WIMD_overall = gpd.GeoDataFrame(WIMD_overall)
WIMD_station_gpd = gpd.overlay(WIMD_overall, station_catchment_gpd, how='intersection')
WIMD_station_gpd['rank'] = WIMD_station_gpd['rank'].astype('int')
mean_ranks = WIMD_station_gpd[["station_code","rank"]].groupby("station_code").mean()
mean_ranks = mean_ranks.sort_values(by="station_code")
station_catchment_gpd = station_catchment_gpd.sort_values(by="station_code")
station_catchment_gpd["average_catchment_LSOA_overall_rank"] = mean_ranks.values

# Fares Data

Data downloaded from national rail api

In [79]:
ticket_types_df = pd.read_csv("Data/Fares data/CSV/ticket_types.csv")
flow_routes_df = pd.read_csv("Data/Fares data/CSV/Flow_Routes.csv")
flow_fares_df = pd.read_csv("Data/Fares data/CSV/Flow_Fares.csv")
locations_df = pd.read_csv("Data/Fares data/CSV/Locations.csv")

station_df = gpd.read_file("Data\Rail Stations\SE_Wales_Rail_Stations.shp")
station_df = station_df[station_df["StationNm"].isin(station_catchment_gpd["station_names"])]
SE_Wales_station = locations_df[locations_df["CRS_code"].isin(station_df["StationCd"])]
SE_Wales_station = SE_Wales_station.drop_duplicates(subset=['NLC_code'])

cardiff_central_bound_routes = flow_routes_df[flow_routes_df["destination_code"] == "3899"]
cardiff_central_bound_routes["origin_code"] = cardiff_central_bound_routes["origin_code"].astype(str)
SE_Wales_to_Central_Routes = cardiff_central_bound_routes[cardiff_central_bound_routes["origin_code"].isin(SE_Wales_station["NLC_code"])]

cardiff_flow_fares = flow_fares_df[flow_fares_df["flow_id"].isin(SE_Wales_to_Central_Routes["flow_id"])]
cardiff_flow_fares_types = cardiff_flow_fares[cardiff_flow_fares["ticket_code"].isin(ticket_types_df["ticket_code"])]
standard_ticket_df = ticket_types_df[ticket_types_df["ticket_class"] == 2]
single_standard_df = standard_ticket_df[standard_ticket_df["max_passengers"] == 1]
single_standard_adult_df = single_standard_df[single_standard_df["ticket_type"]== "S"]
cardiff_flow_fares_adult = cardiff_flow_fares_types[cardiff_flow_fares_types["ticket_code"].isin(single_standard_adult_df["ticket_code"])]
cardiff_flow_fares_df = cardiff_flow_fares_adult.groupby(["flow_id"]).max()
try:
    cardiff_flow_fares_df = cardiff_flow_fares_df.drop(["Unnamed: 0"],axis=1)
except KeyError:
    pass
cardiff_flow_fares_df = cardiff_flow_fares_df.reset_index()
cardiff_flow_fares_df = cardiff_flow_fares_df.sort_values(by="flow_id")

try:
    SE_Wales_to_Central_Routes = SE_Wales_to_Central_Routes.drop(["Unnamed: 0"],axis=1)
except KeyError:
    pass
SE_Wales_to_Central_Routes = SE_Wales_to_Central_Routes.sort_values(by="flow_id")

SE_Wales_to_Central_Routes = SE_Wales_to_Central_Routes[SE_Wales_to_Central_Routes["flow_id"].isin(cardiff_flow_fares_df["flow_id"])]
SE_Wales_to_Central_Routes = SE_Wales_to_Central_Routes.sort_values(by="flow_id").reset_index()
cardiff_flow_fares_df = cardiff_flow_fares_df.sort_values(by="flow_id").reset_index()

SE_Wales_to_Central_Routes["fare_pence"] = cardiff_flow_fares_df["fare_pence"].copy()
try:
    SE_Wales_to_Central_Routes = SE_Wales_to_Central_Routes.drop(["index"],axis=1)
except KeyError:
    pass
SE_WALES_stations = locations_df[locations_df["NLC_code"].isin(SE_Wales_to_Central_Routes["origin_code"])]
SE_WALES_stations = SE_WALES_stations.drop_duplicates(subset=['CRS_code'])
SE_WALES_stations = SE_WALES_stations.sort_values(by="NLC_code").reset_index()
SE_Wales_to_Central_Routes = SE_Wales_to_Central_Routes.sort_values(by="origin_code").reset_index()
SE_Wales_to_Central_Routes["station_code"] = SE_WALES_stations["CRS_code"]

station_catchment_gpd = station_catchment_gpd.sort_values(by="station_names")
station_df = station_df.sort_values(by="StationNm")
station_catchment_gpd.insert(6, column="CRS_code", value=list(station_df["StationCd"]))

fares = []
for index,row in station_catchment_gpd.iterrows():
    checker = 0
    for i,r in SE_Wales_to_Central_Routes.iterrows():
        if row.CRS_code == r.station_code:
            fares.append(r.fare_pence)
            checker = 1
            break
    if checker == 0:
        fares.append(np.nan)

station_catchment_gpd["fares_pence"] = fares

# Save to CSV file

In [80]:
#Load sector shapefile
sectors_gpd = gpd.read_file("Data/Sectors/SE_Wales_Sectors.shp")
sectors_gpd = sectors_gpd[sectors_gpd["SectorNumb"].isin(cardiff_df["sectornumb_start"])]
sectors_gpd = sectors_gpd.sort_values(by="SectorNumb").reset_index()
cardiff_df = cardiff_df.sort_values(by="sectornumb_start").reset_index()
sectors_gpd["Rail_share"] = cardiff_df["Rail share >100 (Num)"].copy()

#Load stations shapefile
stations_gpd = gpd.read_file("Data/Rail Stations/SE_Wales_Rail_Stations.shp")

#Match stations to sectors
sector_stations_gpd = gpd.sjoin(stations_gpd,sectors_gpd,how='left',op='within')
sector_stations_gpd = sector_stations_gpd[sector_stations_gpd["StationCd"].isin(station_catchment_gpd["CRS_code"])]
sector_stations_gpd = sector_stations_gpd.sort_values(by="StationCd").reset_index(drop=True)


station_catchment_gpd = station_catchment_gpd.sort_values(by="CRS_code").reset_index(drop=True)
station_catchment_gpd["Rail_Share"] = sector_stations_gpd["Rail_share"]

In [81]:
station_catchment_gpd

Unnamed: 0,index,CoreId,ContourAre,MERGE_SRC,geometry,station_names,CRS_code,station_code,centroid,nearest_geometry,...,1_car_ratio,2_cars_ratio,3_or_more_cars_ratio,average_catchment_LSOA_education_rank,average_catchment_LSOA_employment_rank,average_catchment_LSOA_income_rank,average_catchment_LSOA_health_rank,average_catchment_LSOA_overall_rank,fares_pence,Rail_Share
0,22,1,800.0,800m catchments SE Wales\3_Aberdare,"POLYGON ((300593.000 202128.276, 300493.000 20...",Aberdare,ABA,3,POINT (300519.617 202713.989),POINT (301987.713471673 201623.1331251229),...,0.418094,0.266282,0.093460,811.000000,510.200000,657.000000,674.400000,618.800000,,23.0
1,0,1,800.0,800m catchments SE Wales\1_Aber,"POLYGON ((314593.000 186622.045, 314493.000 18...",Aber,ABE,1,POINT (314889.799 187055.053),POINT (315394.0738624227 186969.15629471873),...,0.418283,0.276775,0.101656,1095.750000,1010.000000,1028.000000,783.000000,1052.250000,420.0,10.0
2,11,1,800.0,800m catchments SE Wales\2_Abercynon,"POLYGON ((307893.000 194209.788, 307793.000 19...",Abercynon,ACY,2,POINT (308126.203 194758.806),POINT (308493 196490.9224767291),...,0.418094,0.266282,0.093460,708.666667,803.333333,888.000000,685.000000,855.666667,360.0,23.0
3,33,1,800.0,800m catchments SE Wales\4_Abergavenny,"POLYGON ((330093.000 213695.719, 330079.219 21...",Abergavenny,AGV,4,POINT (330089.830 213768.802),POINT (330152.5962757146 213614.54962705105),...,0.382807,0.338921,0.148837,1348.000000,1133.000000,1164.000000,1305.000000,1106.000000,1090.0,32.0
4,92,1,800.0,800m catchments SE Wales\9_Birchgrove,"POLYGON ((316793.000 180322.679, 316734.918 18...",Birchgrove,BCG,9,POINT (316599.002 180921.685),POINT (316737.3413825734 180713),...,0.431383,0.239295,0.069414,1617.111111,1583.111111,1563.666667,1446.111111,1621.555556,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,95,1,800.0,800m catchments SE Wales\92_Wildmill,"POLYGON ((290393.000 180886.721, 290293.000 18...",Wildmill,WMI,92,POINT (290533.572 181357.854),POINT (290593 180478.28077962692),...,0.418679,0.295471,0.103579,776.800000,653.800000,740.600000,574.400000,717.200000,,19.0
83,93,1,800.0,800m catchments SE Wales\90_Waun-gron Park,"POLYGON ((314593.000 176851.520, 314493.000 17...",Waun-gron Park,WNG,90,POINT (314781.499 177375.787),POINT (314224.51621832437 177631.83129942804),...,0.431383,0.239295,0.069414,1349.600000,1098.400000,1162.200000,1214.500000,1168.300000,520.0,0.0
84,96,1,800.0,800m catchments SE Wales\93_Ynyswen,"POLYGON ((295093.000 196928.322, 295012.750 19...",Ynyswen,YNW,93,POINT (294892.232 197459.757),POINT (294289.80102627975 197691.15477032142),...,0.418094,0.266282,0.093460,614.857143,376.714286,506.571429,541.714286,512.285714,300.0,22.0
85,97,1,800.0,800m catchments SE Wales\94_Ystrad Mynach,"POLYGON ((314093.000 193665.461, 313993.000 19...",Ystrad Mynach,YSM,94,POINT (314213.731 194248.005),POINT (314755.64139722753 194649.78645525913),...,0.418283,0.276775,0.101656,1122.250000,972.250000,1115.750000,965.250000,1051.000000,,43.0


In [86]:
try:
    station_catchment_gpd =station_catchment_gpd.drop(["index","CoreId","ContourAre","MERGE_SRC","station_code"], axis=1)
except KeyError:
    pass


station_catchment_gpd["Rail_Share"] = station_catchment_gpd["Rail_Share"].astype(float)
station_catchment_gpd.to_csv("Model_data/station_data.csv")


Index(['index', 'CoreId', 'ContourAre', 'MERGE_SRC', 'geometry',
       'station_names', 'CRS_code', 'station_code', 'centroid',
       'nearest_geometry', 'nearest_station_distance', 'nearest_station_name',
       'nearest_station_code', 'Parking_Spaces', 'Parking_Occupancy',
       'Rail_Journey_Time_Cardiff_Central_mins', 'car_journey_times',
       'Journey_Time_Savings_By_Rail', 'Sum Aged 15 and under',
       'Sum Aged 16 to 24', 'Sum Aged 25 to 34', 'Sum Aged 35 to 49',
       'Sum Aged 50 to 64', 'Sum Aged 65 and over', 'Sum TOTAL All ages',
       'Ratio_15_and_under', 'Ratio_16_to_24', 'Ratio_25_to_34',
       'Ratio_35_to_49', 'Ratio_50_to_64', 'Ratio_65_and_over',
       'No_cars_ratio', '1_car_ratio', '2_cars_ratio', '3_or_more_cars_ratio',
       'average_catchment_LSOA_education_rank',
       'average_catchment_LSOA_employment_rank',
       'average_catchment_LSOA_income_rank',
       'average_catchment_LSOA_health_rank',
       'average_catchment_LSOA_overall_rank', 'fa

In [None]:
gpd.read_file
final_df["geometry"]