### This notebook fetches a collection of pre-processed csv files and concatenates them together into one larger csv file, ready for feature engineering.

In [1]:
from google.cloud import bigquery
import pandas as pd

# Initialize client (will use credentials from your environment)
client = bigquery.Client(project='cyclemore')

# Verify it's working
print(f"‚úÖ Connected to project: {client.project}")

# List tables to see what you have
tables = list(client.list_tables("cycling_routes"))
print("\nüìä Available tables:")
for table in tables:
    print(f"  - {table.table_id}")

‚úÖ Connected to project: cyclemore

üìä Available tables:
  - Alps
  - Costa_Brava
  - County_Kerry
  - Dolomites
  - Fjords
  - Mallorca
  - Munich
  - UK1
  - UK2_Data
  - UK3
  - UK4
  - UK_5
  - amsterdam
  - belgium
  - eastcoastus
  - lille
  - netherlands
  - provence
  - tile_esp_a_northwest
  - tile_ita_b_east
  - tile_ita_sicilia
  - tile_ita_west


In [2]:
from google.cloud import bigquery
import pandas as pd

client = bigquery.Client(project='cyclemore')

# List all your UK tables
tables = ['Alps','Costa_Brava','County_Kerry','Dolomites','Fjords','Mallorca','Munich', 'UK1', 'UK2_Data', 'UK3', 'UK4', 'UK_5','amsterdam','belgium','eastcoastus','lille','netherlands','provence']  # Adjust based on what tables you saw above

dataframes = []

for table_name in tables:
    query = f"SELECT * FROM `cyclemore.cycling_routes.{table_name}`"

    try:
        df = client.query(query).to_dataframe()
        df['region'] = table_name  # Add region column
        dataframes.append(df)
        print(f"‚úÖ Fetched {table_name}: {len(df)} routes, {len(df.columns)} columns")
    except Exception as e:
        print(f"‚ùå Error fetching {table_name}: {e}")

# Combine all dataframes
if dataframes:
    all_routes = pd.concat(dataframes, ignore_index=True)

    # Remove duplicates by route ID
    print(f"\nBefore deduplication: {len(all_routes)} routes")
    all_routes = all_routes.drop_duplicates(subset=['id'], keep='first')
    print(f"After deduplication: {len(all_routes)} routes")

    # Save combined file
    all_routes.to_csv('All_Routes_Combined.csv', index=False)
    print(f"\nüíæ Saved to All_Routes_Combined.csv")

    # View
    display(all_routes.head())
    print(f"\nFinal shape: {all_routes.shape}")



‚úÖ Fetched Alps: 1000 routes, 13 columns




‚úÖ Fetched Costa_Brava: 91 routes, 13 columns




‚úÖ Fetched County_Kerry: 149 routes, 13 columns




‚úÖ Fetched Dolomites: 149 routes, 13 columns




‚úÖ Fetched Fjords: 9 routes, 13 columns




‚úÖ Fetched Mallorca: 35 routes, 13 columns




‚úÖ Fetched Munich: 1693 routes, 13 columns




‚úÖ Fetched UK1: 1998 routes, 13 columns




‚úÖ Fetched UK2_Data: 1999 routes, 13 columns




‚úÖ Fetched UK3: 1 routes, 13 columns




‚úÖ Fetched UK4: 1996 routes, 13 columns




‚úÖ Fetched UK_5: 1999 routes, 13 columns




‚úÖ Fetched amsterdam: 2 routes, 12 columns




‚úÖ Fetched belgium: 3875 routes, 12 columns




‚úÖ Fetched eastcoastus: 1997 routes, 12 columns




‚úÖ Fetched lille: 106 routes, 12 columns




‚úÖ Fetched netherlands: 3289 routes, 12 columns




‚úÖ Fetched provence: 523 routes, 13 columns

Before deduplication: 20911 routes
After deduplication: 19151 routes

üíæ Saved to All_Routes_Combined.csv


Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,surface,waytype,waycategory,steepness,region
0,6101627,Ciclopedonale Lago Ghirla,1885.5,377.1,[51.8],[93.8],2,0,"[[0, 37, 3], [37, 38, 0], [38, 62, 3]]","[[0, 38, 6], [38, 62, 2]]","[[0, 62, 0]]","[[0, 7, -4], [7, 33, 1], [33, 39, -2], [39, 41...",Alps
1,10187640,Tour du L√©man - √âtape 4,4138.4,827.6,[164.5],[173.5],2,0,"[[0, 118, 3]]","[[0, 118, 1]]","[[0, 118, 0]]","[[0, 47, 1], [47, 67, -1], [67, 78, 2], [78, 8...",Alps
2,12509770,La Madeleine Nord,24714.5,4942.9,[2182.8],[670.8],2,0,"[[0, 525, 3], [525, 553, 0], [553, 595, 3], [5...","[[0, 897, 2]]","[[0, 897, 0]]","[[0, 2, 5], [2, 6, -3], [6, 7, 5], [7, 17, 4],...",Alps
3,15361105,Unnamed route,984.9,197.0,[14.5],[21.5],2,0,"[[0, 30, 3]]","[[0, 30, 3]]","[[0, 30, 0]]","[[0, 30, 0]]",Alps
4,15630528,Unnamed route,1662.8,332.5,[31.1],[9.1],2,0,"[[0, 49, 3]]","[[0, 49, 2]]","[[0, 49, 0]]","[[0, 49, 1]]",Alps



Final shape: (19151, 13)
