In [None]:
# Install required packages (Databricks)
%pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client google-cloud-bigquery

In [None]:
import os
import json
from datetime import datetime
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
from google.cloud import bigquery
import pickle

# Google Calendar API scope
SCOPES = ['https://www.googleapis.com/auth/calendar.readonly']

# BigQuery settings
PROJECT_ID = 'clean-pilot-419017'
DATASET_ID = 'dev'
TABLE_ID = 'calendar_events'

In [None]:
def authenticate_google_calendar():
    """Authenticate with Google Calendar API using service account (Databricks)."""
    from google.oauth2 import service_account
    
    # Option 1: Using Databricks secrets (recommended)
    # Store your service account JSON key in Databricks secrets
    # dbutils.secrets.get(scope="google", key="service_account_key")
    
    # Option 2: Using a service account JSON stored in DBFS
    # service_account_path = '/dbfs/mnt/credentials/service-account.json'
    
    # For now, we'll use a more flexible approach with secrets
    try:
        # Try to get credentials from Databricks secrets
        service_account_info = json.loads(
            dbutils.secrets.get(scope="google", key="calendar_service_account")
        )
        creds = service_account.Credentials.from_service_account_info(
            service_account_info, 
            scopes=SCOPES
        )
    except:
        # Fallback: Load from DBFS path
        service_account_path = '/dbfs/FileStore/credentials/service-account.json'
        if os.path.exists(service_account_path):
            creds = service_account.Credentials.from_service_account_file(
                service_account_path,
                scopes=SCOPES
            )
        else:
            raise Exception(
                "Please set up authentication:\n"
                "1. Store service account JSON in Databricks secrets: "
                "dbutils.secrets.put(scope='google', key='calendar_service_account', value='<json_content>')\n"
                "OR\n"
                "2. Upload service-account.json to /dbfs/FileStore/credentials/"
            )
    
    # Note: Service account needs to have calendar access delegated via domain-wide delegation
    # or use user impersonation
    if 'subject' in dir(creds):
        # For domain-wide delegation, specify the user email
        user_email = dbutils.secrets.get(scope="google", key="calendar_user_email")
        creds = creds.with_subject(user_email)
    
    return build('calendar', 'v3', credentials=creds)

# Authenticate
calendar_service = authenticate_google_calendar()
print("Successfully authenticated with Google Calendar API")

In [None]:
def fetch_all_calendar_events(service):
    """Fetch all calendar events from the primary calendar."""
    events = []
    page_token = None
    
    while True:
        try:
            events_result = service.events().list(
                calendarId='primary',
                pageToken=page_token,
                singleEvents=True,
                orderBy='startTime'
            ).execute()
            
            events.extend(events_result.get('items', []))
            page_token = events_result.get('nextPageToken')
            
            if not page_token:
                break
                
        except Exception as e:
            print(f"Error fetching events: {e}")
            break
    
    print(f"Fetched {len(events)} calendar events")
    return events

# Fetch all events
calendar_events = fetch_all_calendar_events(calendar_service)

# Display first few events
if calendar_events:
    print("\nFirst 3 events:")
    for event in calendar_events[:3]:
        start = event.get('start', {}).get('dateTime', event.get('start', {}).get('date'))
        summary = event.get('summary', 'No title')
        print(f"- {start}: {summary}")

In [None]:
def prepare_events_for_bigquery(events):
    """Transform calendar events into BigQuery-compatible format."""
    rows = []
    
    for event in events:
        row = {
            'event_id': event.get('id'),
            'summary': event.get('summary'),
            'description': event.get('description'),
            'location': event.get('location'),
            'creator_email': event.get('creator', {}).get('email'),
            'organizer_email': event.get('organizer', {}).get('email'),
            'start_datetime': event.get('start', {}).get('dateTime'),
            'start_date': event.get('start', {}).get('date'),
            'start_timezone': event.get('start', {}).get('timeZone'),
            'end_datetime': event.get('end', {}).get('dateTime'),
            'end_date': event.get('end', {}).get('date'),
            'end_timezone': event.get('end', {}).get('timeZone'),
            'status': event.get('status'),
            'html_link': event.get('htmlLink'),
            'created': event.get('created'),
            'updated': event.get('updated'),
            'event_type': event.get('eventType'),
            'visibility': event.get('visibility'),
            'attendees': json.dumps(event.get('attendees', [])),
            'recurrence': json.dumps(event.get('recurrence', [])),
            'extracted_at': datetime.utcnow().isoformat()
        }
        rows.append(row)
    
    return rows

# Prepare data for BigQuery
bq_rows = prepare_events_for_bigquery(calendar_events)
print(f"Prepared {len(bq_rows)} rows for BigQuery")

In [None]:
# Initialize BigQuery client (Databricks)
# Option 1: Using service account credentials
try:
    service_account_info = json.loads(
        dbutils.secrets.get(scope="google", key="bigquery_service_account")
    )
    from google.oauth2 import service_account
    credentials = service_account.Credentials.from_service_account_info(service_account_info)
    bq_client = bigquery.Client(project=PROJECT_ID, credentials=credentials)
except:
    # Option 2: Default credentials (if configured in cluster)
    bq_client = bigquery.Client(project=PROJECT_ID)

# Define table schema
schema = [
    bigquery.SchemaField("event_id", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("summary", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("description", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("location", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("creator_email", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("organizer_email", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("start_datetime", "TIMESTAMP", mode="NULLABLE"),
    bigquery.SchemaField("start_date", "DATE", mode="NULLABLE"),
    bigquery.SchemaField("start_timezone", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("end_datetime", "TIMESTAMP", mode="NULLABLE"),
    bigquery.SchemaField("end_date", "DATE", mode="NULLABLE"),
    bigquery.SchemaField("end_timezone", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("html_link", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("created", "TIMESTAMP", mode="NULLABLE"),
    bigquery.SchemaField("updated", "TIMESTAMP", mode="NULLABLE"),
    bigquery.SchemaField("event_type", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("visibility", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("attendees", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("recurrence", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("extracted_at", "TIMESTAMP", mode="NULLABLE"),
]

# Create table reference
table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

# Create or get table
try:
    table = bq_client.get_table(table_ref)
    print(f"Table {table_ref} already exists")
except:
    table = bigquery.Table(table_ref, schema=schema)
    table = bq_client.create_table(table)
    print(f"Created table {table_ref}")

print("BigQuery client initialized")

In [None]:
# Insert events into BigQuery
if bq_rows:
    errors = bq_client.insert_rows_json(table_ref, bq_rows)
    
    if errors:
        print(f"Encountered errors while inserting rows: {errors}")
    else:
        print(f"Successfully inserted {len(bq_rows)} events into {table_ref}")
        
        # Query to verify
        query = f"""
        SELECT 
            COUNT(*) as total_events,
            MIN(start_datetime) as earliest_event,
            MAX(start_datetime) as latest_event
        FROM `{table_ref}`
        """
        
        results = bq_client.query(query).result()
        for row in results:
            print(f"\nTable Summary:")
            print(f"- Total events: {row.total_events}")
            print(f"- Earliest event: {row.earliest_event}")
            print(f"- Latest event: {row.latest_event}")
else:
    print("No events to insert")

## Setup Instructions for Databricks

Before running this notebook, you need to set up authentication:

### 1. Enable Google Calendar API
   - Go to [Google Cloud Console](https://console.cloud.google.com/)
   - Select project `clean-pilot-419017`
   - Navigate to "APIs & Services" > "Library"
   - Search for "Google Calendar API" and enable it

### 2. Create Service Account
   - Go to "APIs & Services" > "Credentials"
   - Click "Create Credentials" > "Service Account"
   - Download the JSON key file
   
### 3. Set up Domain-Wide Delegation (for accessing user calendars)
   - In the service account details, click "Show Domain-Wide Delegation"
   - Enable it and note the Client ID
   - In Google Workspace Admin Console, add the Client ID with scope:
     - `https://www.googleapis.com/auth/calendar.readonly`

### 4. Store Credentials in Databricks Secrets
   
   ```python
   # Create secret scope (run once)
   # dbutils.secrets.createScope(scope="google")
   
   # Store service account JSON
   # dbutils.secrets.put(scope="google", key="calendar_service_account", value='<paste entire JSON content>')
   # dbutils.secrets.put(scope="google", key="bigquery_service_account", value='<paste entire JSON content>')
   
   # Store the email of the user whose calendar you want to access
   # dbutils.secrets.put(scope="google", key="calendar_user_email", value='your.email@domain.com')
   ```

### 5. Alternative: Upload to DBFS
   - Upload `service-account.json` to `/dbfs/FileStore/credentials/`
   - The code will automatically use it as fallback

### 6. Run the cells below in order to:
   - Install required packages
   - Authenticate with service account
   - Fetch all calendar events
   - Create BigQuery table (if it doesn't exist)
   - Insert events into `clean-pilot-419017.dev.calendar_events`