In [None]:
# COLAB AUTH FIX + BIGQUERY CLIENT
from google.colab import auth
auth.authenticate_user()
print("Authenticated with your Google account")

import os
from google.cloud import bigquery
import textwrap

# YOUR PROJECT
PROJECT_ID = "even-blueprint-441418-p2"
DATASET_ID = "media_analytics"
FULL_DATASET = f"{PROJECT_ID}.{DATASET_ID}"

os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID
client = bigquery.Client(project=PROJECT_ID)

print(f"Connected to BigQuery → {PROJECT_ID}.{DATASET_ID}")
print("Ready!")

Authenticated with your Google account
Connected to BigQuery → even-blueprint-441418-p2.media_analytics
Ready!


In [None]:
from google.colab import files
print("Upload your 4 CSV files:")
uploaded = files.upload()

CSV_FILES = {
    "staging_aggregated_country": "/content/Aggregated_Metrics_By_Country_And_Subscriber_Status.csv",
    "staging_aggregated_video":   "/content/Aggregated_Metrics_By_Video.csv",
    "staging_all_comments":       "/content/All_Comments_Final.csv",
    "staging_video_performance":  "/content/Video_Performance_Over_Time.csv"
}

import os
missing = [f for f in CSV_FILES.values() if not os.path.exists(f)]
if missing:
    print("Missing:", missing)
else:
    print("All CSVs ready!")

Upload your 4 CSV files:


Saving Aggregated_Metrics_By_Country_And_Subscriber_Status.csv to Aggregated_Metrics_By_Country_And_Subscriber_Status (1).csv
Saving Aggregated_Metrics_By_Video.csv to Aggregated_Metrics_By_Video (1).csv
Saving All_Comments_Final.csv to All_Comments_Final (1).csv
Saving Video_Performance_Over_Time.csv to Video_Performance_Over_Time (1).csv
All CSVs ready!


In [None]:
import pandas as pd
from google.cloud import bigquery

def run_query(sql):
    print("Running...")
    job = client.query(sql)
    job.result()
    print(f"Done! → {getattr(job, 'num_dml_affected_rows', 'N/A')} rows")

# 1. CREATE UDF – FIXED WITH TRIPLE DOUBLE QUOTES
print("Creating parse_duration UDF...")
run_query(f"""
CREATE OR REPLACE FUNCTION `{PROJECT_ID}.{DATASET_ID}.parse_duration`(d STRING)
RETURNS INT64
LANGUAGE js AS
\"\"\"
  if (!d || d.trim() === '') return 0;
  const parts = d.split(':').map(Number);
  if (parts.length === 3) return parts[0]*3600 + parts[1]*60 + parts[2];
  if (parts.length === 2) return parts[0]*60 + parts[1];
  return parts[0] || 0;
\"\"\";
""")

# 2. LOAD CSVs TO STAGING
def load_staging(table, path):
    print(f"Loading → {table}")
    df = pd.read_csv(path, encoding="utf-8", engine="python", on_bad_lines="skip", dtype=str)
    df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_", regex=True).str.replace(r"[^\w_]", "", regex=True)
    df = df.dropna(subset=[df.columns[0]])
    df.iloc[:, 0] = df.iloc[:, 0].str.strip()

    job = client.load_table_from_dataframe(
        df, f"{FULL_DATASET}.{table}",
        job_config=bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE", autodetect=True)
    )
    job.result()
    print(f"Loaded {len(df):,} rows")

for t, p in CSV_FILES.items():
    load_staging(t, p)

# 3. BUILD BRONZE TABLE
print("Building bronze_aggregated_video...")
run_query(f"DROP TABLE IF EXISTS `{FULL_DATASET}.bronze_aggregated_video`;")

run_query(f"""
CREATE TABLE `{FULL_DATASET}.bronze_aggregated_video`
PARTITION BY DATE(video_publish_date)
CLUSTER BY video_id
AS
SELECT
  video AS video_id,
  video_title,
  SAFE.PARSE_TIMESTAMP('%b %d, %Y', video_publish_time) AS video_publish_date,
  SAFE_CAST(views AS INT64) AS views,
  SAFE_CAST(watch_time_hours AS FLOAT64) AS watch_time_hours,
  `{PROJECT_ID}.{DATASET_ID}.parse_duration`(average_view_duration) AS avg_view_duration_seconds,
  SAFE_CAST(average_percentage_viewed_ AS FLOAT64) AS avg_view_percentage,
  SAFE_CAST(likes AS INT64) AS likes,
  SAFE_CAST(comments_added AS INT64) AS comments_added,
  SAFE_CAST(your_estimated_revenue_usd AS FLOAT64) AS estimated_revenue_usd
FROM `{FULL_DATASET}.staging_aggregated_video`
WHERE video IS NOT NULL;
""")

print("\nBRONZE LAYER 100% COMPLETE!")
print("You can now run Silver layer!")

Creating parse_duration UDF...
Running...
Done! → None rows
Loading → staging_aggregated_country
Loaded 55,292 rows
Loading → staging_aggregated_video
Loaded 224 rows
Loading → staging_all_comments
Loaded 10,239 rows
Loading → staging_video_performance
Loaded 111,857 rows
Building bronze_aggregated_video...
Running...
Done! → None rows
Running...
Done! → None rows

BRONZE LAYER 100% COMPLETE!
You can now run Silver layer!
