# Load Data

In [18]:
import pandas as pd

In [19]:
df = pd.read_csv("UK_All_Routes_Combined.csv")

In [20]:
df.head()

Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,surface,waytype,waycategory,steepness
0,11367233,Unnamed route,,,[0.0],[0.0],1,0,[],[],[],[]
1,198589,Sean Kelly Tour of Waterford,,,[0.0],[0.0],1,0,[],[],[],[]
2,17718273,Kelly Legacy,,,[0.0],[0.0],1,0,[],[],[],[]
3,1689109,Sliabh Beagh Route 1 - McKenna Trail,,,[0.0],[0.0],1,0,[],[],[],[]
4,1124202,Sperrins Route 2 - The Sawel Cycle Route,,,[0.0],[0.0],1,0,[],[],[],[]


# Data Cleaning

## Descent & Ascent

In [21]:
# Clean values for ascent and descent
df['ascent_m'] = df['ascent_m'].astype(str).str.strip("[]").astype(float)
df['descent_m'] = df['descent_m'].astype(str).str.strip("[]").astype(float)

## Null Values

In [22]:
# Check for null values
df.isna().sum()

id               0
name             0
distance_m     239
duration_s     239
ascent_m         0
descent_m        0
steps            0
turns            0
surface          0
waytype          0
waycategory      0
steepness        0
dtype: int64

In [23]:
# Check null rows
df[df.isnull().any(axis=1)].head(10)

Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,surface,waytype,waycategory,steepness
0,11367233,Unnamed route,,,0.0,0.0,1,0,[],[],[],[]
1,198589,Sean Kelly Tour of Waterford,,,0.0,0.0,1,0,[],[],[],[]
2,17718273,Kelly Legacy,,,0.0,0.0,1,0,[],[],[],[]
3,1689109,Sliabh Beagh Route 1 - McKenna Trail,,,0.0,0.0,1,0,[],[],[],[]
4,1124202,Sperrins Route 2 - The Sawel Cycle Route,,,0.0,0.0,1,0,[],[],[],[]
5,19457243,Unnamed route,,,0.0,0.0,1,0,[],[],[],[]
6,1620344,Norbital,,,0.0,0.0,1,0,[],[],[],[]
7,1213660,Unnamed route,,,0.0,0.0,1,0,[],[],[],[]
8,1180682,Sperrins Route 7 - Banagher Cycle Route,,,0.0,0.0,1,0,[],[],[],[]
9,5472302,Red Squirrel Trail,,,0.0,0.0,1,0,[],[],[],[]


In [24]:
# Drop null values
df = df.dropna()

## Surface Data

In [25]:
# Decode surface values
surface_map = {
    0: "Unknown",
    1: "Paved",
    2: "Unpaved",
    3: "Asphalt",
    4: "Concrete",
    5: "Cobblestone",
    6: "Metal",
    7: "Wood",
    8: "Compacted Gravel",
    9: "Fine Gravel",
    10: "Gravel",
    11: "Dirt",
    12: "Ground",
    13: "Ice",
    14: "Paving Stones",
    15: "Sand",
    16: "Woodchips",
    17: "Grass",
    18: "Grass Paver"
}

In [26]:
import ast

In [27]:
def calc_surface_percentages(surface_data):
    try:
        # Convert string to list if needed
        if isinstance(surface_data, str):
            surface_data = ast.literal_eval(surface_data)

        # Compute segment lengths
        total_length = 0
        surface_lengths = {}

        for seg in surface_data:
            start, end, surf_code = seg
            length = end - start
            total_length += length
            surface_lengths[surf_code] = surface_lengths.get(surf_code, 0) + length

        # Convert to percentage
        for surf in surface_lengths:
            surface_lengths[surf] = round(surface_lengths[surf] / total_length * 100, 2)

        return surface_lengths

    except Exception as e:
        return {}

In [28]:
df['surface_pct'] = df['surface'].apply(calc_surface_percentages)

In [29]:
surface_df = df['surface_pct'].apply(pd.Series)
surface_df = surface_df.rename(columns=surface_map).fillna(0)

In [30]:
df = pd.concat([df, surface_df], axis=1)

In [31]:
df = df.drop(['surface','surface_pct'], axis=1)

In [32]:
surface_df.head()

Unnamed: 0,Asphalt,Unknown,Paved,Compacted Gravel,Wood,Gravel,Paving Stones,Ground,Concrete,Grass,Metal,Unpaved,Dirt,Grass Paver,Sand
50,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
52,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,18.18,81.82,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Waytype

In [33]:
waytype_map = {
    0: "Unknown",
    1: "State Road",
    2: "Road",
    3: "Street",
    4: "Path",
    5: "Track",
    6: "Cycleway",
    7: "Footway",
    8: "Steps",
    9: "Ferry",
    10: "Construction"
}

In [34]:
import ast

def calc_waytype_percentages(waytype_data):
    try:
        # Convert string repr to list if needed
        if isinstance(waytype_data, str):
            waytype_data = ast.literal_eval(waytype_data)

        total_length = 0
        waytype_lengths = {}

        for seg in waytype_data:
            start, end, code = seg
            length = end - start
            total_length += length
            waytype_lengths[code] = waytype_lengths.get(code, 0) + length

        # Convert to percentages
        for code in waytype_lengths:
            waytype_lengths[code] = round(waytype_lengths[code] / total_length * 100, 2)

        return waytype_lengths

    except Exception as e:
        return {}


In [35]:
df['wtype_pct'] = df['waytype'].apply(calc_waytype_percentages)

In [36]:
wtype_df = df['wtype_pct'].apply(pd.Series)
wtype_df = wtype_df.rename(columns=waytype_map).fillna(0)

In [37]:
df = pd.concat([df, wtype_df], axis=1)

In [38]:
df = df.drop(['waytype','wtype_pct'], axis=1)

In [39]:
wtype_df.head()

Unnamed: 0,Road,Cycleway,State Road,Track,Street,Path,Footway,Unknown,Steps,Construction,Ferry
50,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
52,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,81.82,18.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Drop Waycategory

In [40]:
df = df.drop(['waycategory'], axis=1)

## Steepness

In [41]:
steepness_map = {
    -5: "downhill_extreme (<-15%)",
    -4: "downhill_very_steep (-15% to -10%)",
    -3: "downhill_steep (-10% to -7%)",
    -2: "downhill_moderate (-7% to -5%)",
    -1: "downhill_gentle (-5% to 0%)",
     0: "flat (0%)",
     1: "uphill_gentle (0% to 3%)",
     2: "uphill_moderate (3% to 5%)",
     3: "uphill_steep (5% to 7%)",
     4: "uphill_very_steep (7% to 10%)",
     5: "uphill_extreme (>10%)"
}


In [42]:
import ast

def calc_steep_percentages(steep_data):
    try:
        # Convert string repr to list if needed
        if isinstance(steep_data, str):
            steep_data = ast.literal_eval(steep_data)

        total_length = 0
        steep_lengths = {}

        for seg in steep_data:
            start, end, code = seg
            length = end - start
            total_length += length
            steep_lengths[code] = steep_lengths.get(code, 0) + length

        # Convert to percentages
        for code in steep_lengths:
            steep_lengths[code] = round(steep_lengths[code] / total_length * 100, 2)

        return steep_lengths

    except Exception as e:
        return {}


In [43]:
df['steep_pct'] = df['steepness'].apply(calc_steep_percentages)

In [44]:
steep_df = df['steep_pct'].apply(pd.Series)
steep_df = steep_df.rename(columns=steepness_map).fillna(0)

In [45]:
df = pd.concat([df, steep_df], axis=1)

In [46]:
df = df.drop(['steepness','steep_pct'], axis=1)

# Drop irrelevant collumns

In [47]:
df.shape

(7717, 45)

In [48]:
df.to_csv('UK_All_Routes_Combined_Processed.csv')