### This notebook fetches a collection of pre-processed csv files and concatenates them together into one larger csv file, ready for feature engineering.

In [1]:
from google.cloud import bigquery
import pandas as pd

# Initialize client (will use credentials from your environment)
client = bigquery.Client(project='cyclemore')

# Verify it's working
print(f"‚úÖ Connected to project: {client.project}")

# List tables to see what you have
tables = list(client.list_tables("cycling_routes"))
print("\nüìä Available tables:")
for table in tables:
    print(f"  - {table.table_id}")

‚úÖ Connected to project: cyclemore

üìä Available tables:
  - Alps
  - Costa_Brava
  - County_Kerry
  - Dolomites
  - Fjords
  - Mallorca
  - Munich
  - UK1
  - UK2_Data
  - UK3
  - UK4
  - UK_5
  - UK_LONG_LAT


In [2]:
from google.cloud import bigquery
import pandas as pd

client = bigquery.Client(project='cyclemore')

# List all your UK tables
uk_tables = ['UK1', 'UK2_Data', 'UK3', 'UK4', 'UK_5']  # Adjust based on what tables you saw above

dataframes = []

for table_name in uk_tables:
    query = f"SELECT * FROM `cyclemore.cycling_routes.{table_name}`"

    try:
        df = client.query(query).to_dataframe()
        dataframes.append(df)
        print(f"‚úÖ Fetched {table_name}: {len(df)} routes, {len(df.columns)} columns")
    except Exception as e:
        print(f"‚ùå Error fetching {table_name}: {e}")

# Combine all dataframes
if dataframes:
    all_routes = pd.concat(dataframes, ignore_index=True)

    # Remove duplicates by route ID
    print(f"\nBefore deduplication: {len(all_routes)} routes")
    all_routes = all_routes.drop_duplicates(subset=['id'], keep='first')
    print(f"After deduplication: {len(all_routes)} routes")

    # Save combined file
    all_routes.to_csv('UK_All_Routes_Combined.csv', index=False)
    print(f"\nüíæ Saved to UK_All_Routes_Combined.csv")

    # View
    display(all_routes.head())
    print(f"\nFinal shape: {all_routes.shape}")



‚úÖ Fetched UK1: 1998 routes, 12 columns
‚úÖ Fetched UK2_Data: 1999 routes, 12 columns
‚úÖ Fetched UK3: 1 routes, 12 columns
‚úÖ Fetched UK4: 1996 routes, 12 columns
‚úÖ Fetched UK_5: 1999 routes, 12 columns

Before deduplication: 7993 routes
After deduplication: 7956 routes

üíæ Saved to UK_All_Routes_Combined.csv


Unnamed: 0,id,name,distance_m,duration_s,ascent_m,descent_m,steps,turns,surface,waytype,waycategory,steepness
0,11367233,Unnamed route,,,[0.0],[0.0],1,0,[],[],[],[]
1,198589,Sean Kelly Tour of Waterford,,,[0.0],[0.0],1,0,[],[],[],[]
2,17718273,Kelly Legacy,,,[0.0],[0.0],1,0,[],[],[],[]
3,1689109,Sliabh Beagh Route 1 - McKenna Trail,,,[0.0],[0.0],1,0,[],[],[],[]
4,1124202,Sperrins Route 2 - The Sawel Cycle Route,,,[0.0],[0.0],1,0,[],[],[],[]



Final shape: (7956, 12)
