In [None]:
# Rock Climbing Routes: KPI & Feature Enrichment Notebook

#  Setup and Imports 
import pandas as pd
import numpy as np
from google.colab import auth
from google.cloud import bigquery
import pandas_gbq

# Authenticate
auth.authenticate_user()
project_id = 'rock-finder-project'

#  SQL Query: Enrich and Add KPIs (from BigQuery) 
query = """
SELECT *,

  CASE
    WHEN SAFE_CAST(rating_num AS FLOAT64) < 5 THEN 'Beginner'
    WHEN SAFE_CAST(rating_num AS FLOAT64) BETWEEN 5.2 AND 5.9 THEN 'Intermediate'
    WHEN SAFE_CAST(rating_num AS FLOAT64) BETWEEN 5.10 AND 5.11 THEN 'Advanced'
    WHEN SAFE_CAST(rating_num AS FLOAT64) >= 5.12 THEN 'Expert'
    ELSE 'Unknown'
  END AS difficulty_level,

  CASE
    WHEN Length >= 100 THEN 'Long'
    WHEN Length BETWEEN 50 AND 99 THEN 'Medium'
    WHEN Length < 50 THEN 'Short'
    ELSE 'Unknown'
  END AS length_category,

  CASE
    WHEN num_votes >= 10 THEN 'Popular'
    ELSE 'Niche'
  END AS popularity_flag,

  ROUND(avg_stars, 1) AS avg_star_rounded,

  CASE
    WHEN rating_safety IN ('X', 'R') THEN 'High Risk'
    WHEN rating_safety = 'PG13' THEN 'Moderate Risk'
    ELSE 'Safe'
  END AS risk_level,

  CASE 
    WHEN num_votes = 1 THEN 0.25
    WHEN num_votes > 1 THEN ROUND(PERCENT_RANK()OVER(ORDER BY num_votes),2)
    ELSE 0 
  END AS vts_pct,

FROM `rock-finder-project.routes.routes_silver`
"""

#  Load the enriched data into a dataframe 
df = pd.read_gbq(query, project_id=project_id)

#  Add KPI: Length Per Pitch 
df['length_per_pitch'] = df['Length'] / df['Pitches'].replace(0, np.nan)

#  Add KPI: Length Efficiency Category 
def categorize_efficiency(val):
    if pd.isna(val):
        return 'Unknown'
    elif val < 15:
        return 'Very Short'
    elif val < 30:
        return 'Short'
    elif val < 50:
        return 'Moderate'
    elif val < 70:
        return 'Long'
    else:
        return 'Very Long'

df['length_efficiency'] = df['length_per_pitch'].apply(categorize_efficiency)

#Add weighted_score column as avg_stars multiplied by vts_pct
df['weighted_score'] = df['avg_stars'] * df['vts_pct']

#  Export to BigQuery as GOLD table 
pandas_gbq.to_gbq(
    df,
    destination_table='routes.routes_gold',
    project_id=project_id,
    if_exists='replace'
)