# Load Data

In [146]:
import pandas as pd

In [147]:
df = pd.read_csv("UK1.csv")

In [148]:
df.head()

Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,surface,waytype,waycategory,steepness
0,12461,Unnamed route,122479.9,24769.1,[1189.5],[1113.5],201,175,"[[0, 71, 3], [71, 77, 0], [77, 324, 3], [324, ...","[[0, 2, 4], [2, 9, 6], [9, 14, 4], [14, 218, 6...","[[0, 2835, 0]]","[[0, 380, 0], [380, 432, -1], [432, 641, 0], [..."
1,11017,Unnamed route,339371.7,71283.8,[4893.3],[4896.3],291,217,"[[0, 3, 3], [3, 12, 14], [12, 21, 3], [21, 29,...","[[0, 3, 2], [3, 12, 7], [12, 21, 3], [21, 84, ...","[[0, 6490, 0]]","[[0, 110, 0], [110, 148, 1], [148, 193, -1], [..."
2,9540148,EuroVelo 1 - Atlantic Coast Route - part Unite...,325789.5,67774.9,[5023.9],[5021.9],367,300,"[[0, 2, 3], [2, 29, 0], [29, 35, 3], [35, 68, ...","[[0, 5, 3], [5, 32, 2], [32, 35, 3], [35, 68, ...","[[0, 7402, 0]]","[[0, 78, 1], [78, 87, 4], [87, 110, 1], [110, ..."
3,2763769,EuroVelo 1 - Atlantic Coast Route - part Unite...,459941.8,94975.6,[5306.6],[5366.6],580,477,"[[0, 192, 3], [192, 193, 14], [193, 198, 3], [...","[[0, 19, 2], [19, 26, 4], [26, 34, 1], [34, 47...","[[0, 9155, 0]]","[[0, 209, 0], [209, 231, 1], [231, 256, -1], [..."
4,11237,Pennine Cycleway,82482.1,17704.6,[1470.9],[1511.9],169,132,"[[0, 15, 3], [15, 22, 0], [22, 24, 3], [24, 59...","[[0, 24, 2], [24, 30, 3], [30, 169, 2], [169, ...","[[0, 1198, 0], [1198, 1199, 4], [1199, 1493, 0...","[[0, 12, -2], [12, 42, 1], [42, 51, 2], [51, 5..."


# Data Cleaning

## Descent & Ascent

In [149]:
# Clean values for ascent and descent
df['ascent_m'] = df['ascent_m'].astype(str).str.strip("[]").astype(float)
df['descent_m'] = df['descent_m'].astype(str).str.strip("[]").astype(float)

## Null Values

In [150]:
# Check for null values
df.isna().sum()

id              0
name            0
distance_m     50
duration_s     50
ascent_m        0
descent_m       0
steps           0
turns           0
surface         0
waytype         0
waycategory     0
steepness       0
dtype: int64

In [151]:
# Check null rows
df[df.isnull().any(axis=1)].head(10)

Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,surface,waytype,waycategory,steepness
101,11367233,Unnamed route,,,0.0,0.0,1,0,[],[],[],[]
132,198589,Sean Kelly Tour of Waterford,,,0.0,0.0,1,0,[],[],[],[]
202,17718273,Kelly Legacy,,,0.0,0.0,1,0,[],[],[],[]
222,1689109,Sliabh Beagh Route 1 - McKenna Trail,,,0.0,0.0,1,0,[],[],[],[]
252,1124202,Sperrins Route 2 - The Sawel Cycle Route,,,0.0,0.0,1,0,[],[],[],[]
303,19457243,Unnamed route,,,0.0,0.0,1,0,[],[],[],[]
314,1620344,Norbital,,,0.0,0.0,1,0,[],[],[],[]
333,1213660,Unnamed route,,,0.0,0.0,1,0,[],[],[],[]
351,1180682,Sperrins Route 7 - Banagher Cycle Route,,,0.0,0.0,1,0,[],[],[],[]
385,5472302,Red Squirrel Trail,,,0.0,0.0,1,0,[],[],[],[]


In [152]:
# Drop null values
df = df.dropna()

## Surface Data

In [153]:
# Decode surface values
surface_map = {
    0: "paved",
    1: "asphalt",
    2: "concrete",
    3: "paving_stones",
    4: "cobblestone",
    5: "metal",
    6: "wood",
    7: "compacted",
    8: "fine_gravel",
    9: "gravel",
    10: "dirt",
    11: "ground",
    12: "ice",
    13: "salt",
    14: "unknown"
}

In [154]:
import ast

In [None]:
def calc_surface_percentages(surface_data):
    try:
        # Convert string to list if needed
        if isinstance(surface_data, str):
            surface_data = ast.literal_eval(surface_data)

        # Compute segment lengths
        total_length = 0
        surface_lengths = {}

        for seg in surface_data:
            start, end, surf_code = seg
            length = end - start
            total_length += length
            surface_lengths[surf_code] = surface_lengths.get(surf_code, 0) + length

        # Convert to percentage
        for surf in surface_lengths:
            surface_lengths[surf] = round(surface_lengths[surf] / total_length * 100, 2)

        return surface_lengths

    except Exception as e:
        return {}

In [156]:
df['surface_pct'] = df['surface'].apply(calc_surface_percentages)

In [157]:
surface_df = df['surface_pct'].apply(pd.Series)
surface_df = surface_df.rename(columns=surface_map).fillna(0)

In [158]:
surface_df

Unnamed: 0,paving_stones,paved,asphalt,unknown,compacted,fine_gravel,dirt,ground,concrete,cobblestone,wood,ice,15,17,18
0,49.10,48.85,1.94,0.11,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0
1,50.32,36.59,2.00,0.26,0.14,0.12,10.31,0.25,0.00,0.00,0.00,0.00,0.0,0.0,0.0
2,57.12,34.72,3.88,0.45,0.00,2.12,0.24,0.11,0.93,0.43,0.00,0.00,0.0,0.0,0.0
3,60.07,32.92,2.03,0.88,0.14,2.67,1.10,0.00,0.00,0.17,0.01,0.00,0.0,0.0,0.0
4,45.85,30.09,2.12,3.07,0.05,15.66,2.36,0.14,0.00,0.19,0.00,0.47,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1993,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0
1994,36.73,61.22,0.00,2.04,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0
1995,92.08,7.92,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0
1996,0.00,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0


In [159]:
df = pd.concat([df, surface_df], axis=1)

In [160]:
df = df.drop(['surface','surface_pct'], axis=1)

## Waytype

In [161]:
waytype_map = {
    0: "unknown",
    1: "state_road",
    2: "rural_road",
    3: "local_road",
    4: "residential",
    5: "service_road",
    6: "track",
    7: "path",
    8: "cycleway",
    9: "bridleway",
    10: "steps",
    11: "ferry",
    12: "construction",
    13: "living_street",
    14: "pedestrian",
    15: "unclassified"
}


In [None]:
import ast

def calc_waytype_percentages(waytype_data):
    try:
        # Convert string repr to list if needed
        if isinstance(waytype_data, str):
            waytype_data = ast.literal_eval(waytype_data)

        total_length = 0
        waytype_lengths = {}

        for seg in waytype_data:
            start, end, code = seg
            length = end - start
            total_length += length
            waytype_lengths[code] = waytype_lengths.get(code, 0) + length

        # Convert to percentages
        for code in waytype_lengths:
            waytype_lengths[code] = round(waytype_lengths[code] / total_length * 100, 2)

        return waytype_lengths

    except Exception as e:
        return {}


In [163]:
df['wtype_pct'] = df['waytype'].apply(calc_waytype_percentages)

In [164]:
wtype_df = df['wtype_pct'].apply(pd.Series)
wtype_df = wtype_df.rename(columns=waytype_map).fillna(0)

In [167]:
wtype_df

Unnamed: 0,residential,track,rural_road,local_road,state_road,path,service_road,unknown,steps,cycleway,bridleway
0,9.49,23.39,57.92,4.69,1.90,2.05,0.07,0.49,0.00,0.00,0.0
1,0.11,33.90,49.11,5.89,4.07,1.34,5.59,0.00,0.00,0.00,0.0
2,3.03,14.75,74.36,3.80,2.30,0.62,1.15,0.00,0.00,0.00,0.0
3,6.28,30.07,47.30,9.33,4.26,1.20,1.32,0.00,0.24,0.00,0.0
4,3.35,33.58,41.46,12.97,2.26,5.28,0.42,0.61,0.00,0.05,0.0
...,...,...,...,...,...,...,...,...,...,...,...
1993,0.00,0.00,20.99,79.01,0.00,0.00,0.00,0.00,0.00,0.00,0.0
1994,0.00,32.65,65.31,2.04,0.00,0.00,0.00,0.00,0.00,0.00,0.0
1995,0.00,36.63,25.74,30.69,0.00,6.93,0.00,0.00,0.00,0.00,0.0
1996,0.00,0.00,0.00,0.00,0.00,100.00,0.00,0.00,0.00,0.00,0.0


In [168]:
df = pd.concat([df, wtype_df], axis=1)

In [169]:
df = df.drop(['waytype','wtype_pct'], axis=1)

KeyError: "['waytype' 'wtype_pct'] not found in axis"

## Waycategory

In [170]:
waycategory_map = {
    0: "unknown",
    1: "motorway",
    2: "trunk",
    3: "primary",
    4: "secondary",
    5: "tertiary",
    6: "residential",
    7: "service",
    8: "track",
    9: "pedestrian",
    10: "path",
    11: "cycleway",
    12: "footway",
    13: "living_street",
    14: "unclassified",
    15: "ferry"
}


In [None]:
import ast

def calc_waycategory_percentages(waycategory_data):
    try:
        # Convert string repr to list if needed
        if isinstance(waycategory_data, str):
            waycategory_data = ast.literal_eval(waycategory_data)

        total_length = 0
        waycat_lengths = {}

        for seg in waycategory_data:
            start, end, code = seg
            length = end - start
            total_length += length
            waycat_lengths[code] = waycat_lengths.get(code, 0) + length

        # Convert to percentages
        for code in waycat_lengths:
            waycat_lengths[code] = round(waycat_lengths[code] / total_length * 100, 2)

        return waycat_lengths

    except Exception as e:
        return {}


In [172]:
df['wcat_pct'] = df['waycategory'].apply(calc_waycategory_percentages)

In [173]:
wcat_df = df['wcat_pct'].apply(pd.Series)
wcat_df = wcat_df.rename(columns=waycategory_map).fillna(0)

In [176]:
wcat_df

Unnamed: 0,unknown,secondary,16,track
0,100.00,0.00,0.00,0.0
1,100.00,0.00,0.00,0.0
2,100.00,0.00,0.00,0.0
3,100.00,0.00,0.00,0.0
4,99.76,0.05,0.19,0.0
...,...,...,...,...
1993,100.00,0.00,0.00,0.0
1994,100.00,0.00,0.00,0.0
1995,100.00,0.00,0.00,0.0
1996,100.00,0.00,0.00,0.0


In [177]:
df = pd.concat([df, wcat_df], axis=1)

In [178]:
df = df.drop(['waycategory','wcat_pct'], axis=1)

KeyError: "['waycategory' 'wcat_pct'] not found in axis"

## Steepness

In [179]:
steepness_map = {
    -5: "downhill_extreme (<-15%)",
    -4: "downhill_very_steep (-15% to -10%)",
    -3: "downhill_steep (-10% to -7%)",
    -2: "downhill_moderate (-7% to -5%)",
    -1: "downhill_gentle (-5% to 0%)",
     0: "flat (0%)",
     1: "uphill_gentle (0% to 3%)",
     2: "uphill_moderate (3% to 5%)",
     3: "uphill_steep (5% to 7%)",
     4: "uphill_very_steep (7% to 10%)",
     5: "uphill_extreme (>10%)"
}


In [None]:
import ast

def calc_steep_percentages(steep_data):
    try:
        # Convert string repr to list if needed
        if isinstance(steep_data, str):
            steep_data = ast.literal_eval(steep_data)

        total_length = 0
        steep_lengths = {}

        for seg in steep_data:
            start, end, code = seg
            length = end - start
            total_length += length
            steep_lengths[code] = steep_lengths.get(code, 0) + length

        # Convert to percentages
        for code in steep_lengths:
            steep_lengths[code] = round(steep_lengths[code] / total_length * 100, 2)

        return steep_lengths

    except Exception as e:
        return {}


In [181]:
df['steep_pct'] = df['steepness'].apply(calc_steep_percentages)

In [182]:
steep_df = df['steep_pct'].apply(pd.Series)
steep_df = steep_df.rename(columns=steepness_map).fillna(0)

In [184]:
steep_df.head()

Unnamed: 0,flat (0%),downhill_gentle (-5% to 0%),uphill_gentle (0% to 3%),uphill_moderate (3% to 5%),uphill_very_steep (7% to 10%),downhill_moderate (-7% to -5%),uphill_extreme (>10%),downhill_steep (-10% to -7%),downhill_very_steep (-15% to -10%),downhill_extreme (<-15%),uphill_steep (5% to 7%)
0,77.43,16.01,5.57,0.99,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,52.73,19.34,16.19,3.94,0.37,4.51,0.11,1.12,0.59,0.08,1.02
2,49.68,18.72,20.1,4.07,0.61,3.67,0.16,1.15,0.74,0.09,1.0
3,70.56,12.69,11.8,1.97,0.08,1.79,0.03,0.37,0.14,0.02,0.55
4,52.69,15.38,15.94,4.67,1.56,4.01,0.52,2.74,0.57,0.52,1.42


In [185]:
df = pd.concat([df, steep_df], axis=1)

In [186]:
df = df.drop(['steepness','steep_pct'], axis=1)

# Drop irrelevant collumns

In [187]:
df.shape

(1948, 64)

In [188]:
df

Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,paving_stones,paved,asphalt,unknown,compacted,fine_gravel,dirt,ground,concrete,cobblestone,wood,ice,15,17,18,residential,track,rural_road,local_road,state_road,path,service_road,unknown.1,steps.1,cycleway,bridleway,residential.1,track.1,rural_road.1,local_road.1,state_road.1,path.1,service_road.1,unknown.2,steps.2,cycleway.1,bridleway.1,unknown.3,secondary,16,track.2,unknown.4,secondary.1,16.1,track.3,flat (0%),downhill_gentle (-5% to 0%),uphill_gentle (0% to 3%),uphill_moderate (3% to 5%),uphill_very_steep (7% to 10%),downhill_moderate (-7% to -5%),uphill_extreme (>10%),downhill_steep (-10% to -7%),downhill_very_steep (-15% to -10%),downhill_extreme (<-15%),uphill_steep (5% to 7%)
0,12461,Unnamed route,122479.9,24769.1,1189.5,1113.5,201,175,49.10,48.85,1.94,0.11,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,9.49,23.39,57.92,4.69,1.90,2.05,0.07,0.49,0.00,0.00,0.0,9.49,23.39,57.92,4.69,1.90,2.05,0.07,0.49,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,77.43,16.01,5.57,0.99,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1,11017,Unnamed route,339371.7,71283.8,4893.3,4896.3,291,217,50.32,36.59,2.00,0.26,0.14,0.12,10.31,0.25,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.11,33.90,49.11,5.89,4.07,1.34,5.59,0.00,0.00,0.00,0.0,0.11,33.90,49.11,5.89,4.07,1.34,5.59,0.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,52.73,19.34,16.19,3.94,0.37,4.51,0.11,1.12,0.59,0.08,1.02
2,9540148,EuroVelo 1 - Atlantic Coast Route - part Unite...,325789.5,67774.9,5023.9,5021.9,367,300,57.12,34.72,3.88,0.45,0.00,2.12,0.24,0.11,0.93,0.43,0.00,0.00,0.0,0.0,0.0,3.03,14.75,74.36,3.80,2.30,0.62,1.15,0.00,0.00,0.00,0.0,3.03,14.75,74.36,3.80,2.30,0.62,1.15,0.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,49.68,18.72,20.10,4.07,0.61,3.67,0.16,1.15,0.74,0.09,1.00
3,2763769,EuroVelo 1 - Atlantic Coast Route - part Unite...,459941.8,94975.6,5306.6,5366.6,580,477,60.07,32.92,2.03,0.88,0.14,2.67,1.10,0.00,0.00,0.17,0.01,0.00,0.0,0.0,0.0,6.28,30.07,47.30,9.33,4.26,1.20,1.32,0.00,0.24,0.00,0.0,6.28,30.07,47.30,9.33,4.26,1.20,1.32,0.00,0.24,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,70.56,12.69,11.80,1.97,0.08,1.79,0.03,0.37,0.14,0.02,0.55
4,11237,Pennine Cycleway,82482.1,17704.6,1470.9,1511.9,169,132,45.85,30.09,2.12,3.07,0.05,15.66,2.36,0.14,0.00,0.19,0.00,0.47,0.0,0.0,0.0,3.35,33.58,41.46,12.97,2.26,5.28,0.42,0.61,0.00,0.05,0.0,3.35,33.58,41.46,12.97,2.26,5.28,0.42,0.61,0.00,0.05,0.0,99.76,0.05,0.19,0.0,99.76,0.05,0.19,0.0,52.69,15.38,15.94,4.67,1.56,4.01,0.52,2.74,0.57,0.52,1.42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1993,12372003,Unnamed route,1847.9,369.6,16.0,20.0,14,12,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.00,20.99,79.01,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,20.99,79.01,0.00,0.00,0.00,0.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1994,8999288,Unnamed route,2402.4,481.0,16.4,22.4,9,6,36.73,61.22,0.00,2.04,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.00,32.65,65.31,2.04,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,32.65,65.31,2.04,0.00,0.00,0.00,0.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
1995,10926812,Crab & Winkle Link,2779.4,601.0,70.4,9.4,18,13,92.08,7.92,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.00,36.63,25.74,30.69,0.00,6.93,0.00,0.00,0.00,0.00,0.0,0.00,36.63,25.74,30.69,0.00,6.93,0.00,0.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,49.50,0.00,37.62,9.90,0.00,0.00,0.00,0.00,0.00,0.00,2.97
1996,18455157,Redditch Cycle Route 19,1048.9,629.4,12.0,32.0,5,2,0.00,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00,100.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,100.00,0.00,0.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,100.00,0.00,0.00,0.0,0.00,100.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [1]:
import os

cred_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
print(cred_path)
print(os.path.exists(cred_path))


/Users/eugeneleach/code/Eugle3/cycle_more/cycle-more-379922-ecbc43accff.json
False
