<a href="https://colab.research.google.com/github/Adtestdhahri/STRAVA2/blob/main/Activities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Install required packages (if not already installed)
!pip install requests pandas openpyxl --quiet

from google.colab import drive
drive.mount('/content/drive')

import requests
import pandas as pd
import os

# 2. Refresh Strava access token
client_id = "46165"
client_secret = "578c7d816b7e7897edbf885b66204c674738ff42"
refresh_token = "8a4c424f7fdd2d33c968fc9a31e0c63ff141b89b"
refresh_url = "https://www.strava.com/oauth/token"
payload = {
    'client_id': client_id,
    'client_secret': client_secret,
    'grant_type': 'refresh_token',
    'refresh_token': refresh_token
}
token_resp = requests.post(refresh_url, data=payload)
token_resp.raise_for_status()
tokens = token_resp.json()
access_token = tokens['access_token']
headers = {'Authorization': f'Bearer {access_token}'}

# 3. Fetch all athlete activities (paginated)
activities_url = 'https://www.strava.com/api/v3/athlete/activities'
params = {'per_page': 100, 'page': 1}
activities = []
while True:
    resp = requests.get(activities_url, headers=headers, params=params)
    resp.raise_for_status()
    data = resp.json()
    if not data:
        break
    activities.extend(data)
    params['page'] += 1

# 4. Convert to pandas DataFrame
df_athlete = pd.json_normalize(activities)

# 5. Extract lat/lng and convert dates
df_athlete['start_lat'] = df_athlete['start_latlng'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)
df_athlete['start_lng'] = df_athlete['start_latlng'].apply(lambda x: x[1] if isinstance(x, list) and len(x) > 1 else None)
df_athlete['end_lat'] = df_athlete['end_latlng'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)
df_athlete['end_lng'] = df_athlete['end_latlng'].apply(lambda x: x[1] if isinstance(x, list) and len(x) > 1 else None)
df_athlete['start_date_local'] = pd.to_datetime(df_athlete['start_date_local'])

# 6. Create cleaned DataFrame by dropping unsupported columns
df_athlete_clean = df_athlete.drop(columns=[
    'location_city', 'location_state', 'location_country',
    'start_latlng', 'end_latlng'
], errors='ignore')

# 7. Define local Drive folder path and ensure it exists
drive_folder = os.path.expanduser("/content/drive/MyDrive/StravaProjectData")
os.makedirs(drive_folder, exist_ok=True)

# 8. Write df_athlete to its own Excel file
excel_path_1 = os.path.join(drive_folder, "athlete_activities_raw.csv")
df_athlete.to_csv(excel_path_1, index=False)

# 9. Write df_athlete_clean to its own Excel file
excel_path_2 = os.path.join(drive_folder, "athlete_activities_clean.csv")
df_athlete_clean.to_csv(excel_path_2, index=False)

print("✅ Files written to Drive (local path):")
print(f"   Raw activities → {excel_path_1}")
print(f"   Cleaned activities → {excel_path_2}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Files written to Drive (local path):
   Raw activities → /root/drive/MyDrive/StravaProjectData/athlete_activities_raw.csv
   Cleaned activities → /root/drive/MyDrive/StravaProjectData/athlete_activities_clean.csv


In [None]:
# 1. Install dependencies
!pip install gspread pandas --quiet

# 2. Authenticate your Google user (runs an OAuth flow)
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default

creds, _ = default()
gc = gspread.authorize(creds)

# 3. Read the existing CSV files into pandas DataFrames
import pandas as pd
import numpy as np

# Adjust these paths if your CSVs are located elsewhere
raw_csv_path   = "drive/MyDrive/StravaProjectData/athlete_activities_raw.csv"
clean_csv_path = "drive/MyDrive/StravaProjectData/athlete_activities_clean.csv"

df_raw   = pd.read_csv(raw_csv_path)
df_clean = pd.read_csv(clean_csv_path)

# 4. Replace any NaN or infinite values (Sheets won’t accept them)
def sanitize_df(df: pd.DataFrame) -> pd.DataFrame:
    df2 = df.replace([np.inf, -np.inf], np.nan).fillna("")
    # Convert numpy types → native Python types
    for col in df2.columns:
        if pd.api.types.is_integer_dtype(df2[col].dtype) or pd.api.types.is_float_dtype(df2[col].dtype):
            df2[col] = df2[col].astype(object).where(df2[col] != "", df2[col])
    return df2

df_raw_safe   = sanitize_df(df_raw)
df_clean_safe = sanitize_df(df_clean)

# 5. Create a new Google Sheet and add two worksheets
spreadsheet = gc.create("Strava Activities from CSV")

# Rename default sheet to "Raw_Activities"
worksheet_raw = spreadsheet.sheet1
worksheet_raw.update_title("Raw_Activities")

# Add a second tab named "Cleaned_Activities"
worksheet_clean = spreadsheet.add_worksheet(title="Cleaned_Activities", rows="1000", cols="20")

# 6. Helper to batch‐write a DataFrame into its worksheet
def df_to_worksheet(df: pd.DataFrame, worksheet):
    headers = df.columns.tolist()
    rows    = df.values.tolist()
    data    = [headers] + rows

    n_rows = len(data)
    n_cols = len(headers)
    worksheet.resize(rows=n_rows, cols=n_cols)

    cell_list = worksheet.range(1, 1, n_rows, n_cols)
    flat_values = [item for sublist in data for item in sublist]

    for i, val in enumerate(flat_values):
        cell_list[i].value = val

    worksheet.update_cells(cell_list)

# 7. Write both DataFrames to their respective tabs
df_to_worksheet(df_raw_safe, worksheet_raw)
df_to_worksheet(df_clean_safe, worksheet_clean)

print("✅ CSVs have been converted to Google Sheets:")
print(f"  • Raw data tab → {spreadsheet.url}#gid={worksheet_raw.id}")
print(f"  • Cleaned data tab → {spreadsheet.url}#gid={worksheet_clean.id}")

✅ CSVs have been converted to Google Sheets:
  • Raw data tab → https://docs.google.com/spreadsheets/d/1FCK4D3CT31hS2AdYN_9HNeZUF3LRarl0QNe9VLrSttk#gid=0
  • Cleaned data tab → https://docs.google.com/spreadsheets/d/1FCK4D3CT31hS2AdYN_9HNeZUF3LRarl0QNe9VLrSttk#gid=781298523
