In [None]:
# Install required packages for Google Calendar API
%pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client

In [None]:
import json
from datetime import datetime
from google.oauth2 import service_account
from googleapiclient.discovery import build
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, DateType

# Google Calendar API scope
SCOPES = ['https://www.googleapis.com/auth/calendar.readonly']

# Databricks table settings
DATABASE_NAME = 'develop'
TABLE_NAME = 'google_calendar'

In [None]:
def authenticate_google_calendar():
    """Authenticate with Google Calendar API using Databricks secrets."""
    try:
        # Get service account credentials from Databricks secrets
        service_account_json = dbutils.secrets.get(scope="google", key="calendar_service_account")
        service_account_info = json.loads(service_account_json)
        
        # Create credentials
        creds = service_account.Credentials.from_service_account_info(
            service_account_info, 
            scopes=SCOPES
        )
        
        # If you need to access a specific user's calendar via domain-wide delegation
        # Uncomment and set the user email in Databricks secrets
        try:
            user_email = dbutils.secrets.get(scope="google", key="calendar_user_email")
            creds = creds.with_subject(user_email)
            print(f"Using domain-wide delegation for user: {user_email}")
        except:
            print("Using service account's own calendar")
        
        return build('calendar', 'v3', credentials=creds)
        
    except Exception as e:
        raise Exception(
            f"Authentication failed: {e}\n\n"
            "Please ensure you have set up Databricks secrets:\n"
            "1. dbutils.secrets.put(scope='google', key='calendar_service_account', string_value='<service_account_json>')\n"
            "2. (Optional) dbutils.secrets.put(scope='google', key='calendar_user_email', string_value='user@domain.com')"
        )

# Authenticate
calendar_service = authenticate_google_calendar()
print("✓ Successfully authenticated with Google Calendar API")

In [None]:
def fetch_all_calendar_events(service):
    """Fetch all calendar events from the primary calendar."""
    events = []
    page_token = None
    
    while True:
        try:
            events_result = service.events().list(
                calendarId='primary',
                pageToken=page_token,
                singleEvents=True,
                orderBy='startTime'
            ).execute()
            
            events.extend(events_result.get('items', []))
            page_token = events_result.get('nextPageToken')
            
            if not page_token:
                break
                
        except Exception as e:
            print(f"Error fetching events: {e}")
            break
    
    print(f"✓ Fetched {len(events)} calendar events")
    return events

# Fetch all events
calendar_events = fetch_all_calendar_events(calendar_service)

# Display first few events
if calendar_events:
    print("\nFirst 3 events:")
    for event in calendar_events[:3]:
        start = event.get('start', {}).get('dateTime', event.get('start', {}).get('date'))
        summary = event.get('summary', 'No title')
        print(f"  - {start}: {summary}")
else:
    print("No events found")

In [None]:
def prepare_events_for_databricks(events):
    """Transform calendar events into structured data for Databricks."""
    rows = []
    
    for event in events:
        row = {
            'event_id': event.get('id'),
            'summary': event.get('summary'),
            'description': event.get('description'),
            'location': event.get('location'),
            'creator_email': event.get('creator', {}).get('email'),
            'organizer_email': event.get('organizer', {}).get('email'),
            'start_datetime': event.get('start', {}).get('dateTime'),
            'start_date': event.get('start', {}).get('date'),
            'start_timezone': event.get('start', {}).get('timeZone'),
            'end_datetime': event.get('end', {}).get('dateTime'),
            'end_date': event.get('end', {}).get('date'),
            'end_timezone': event.get('end', {}).get('timeZone'),
            'status': event.get('status'),
            'html_link': event.get('htmlLink'),
            'created': event.get('created'),
            'updated': event.get('updated'),
            'event_type': event.get('eventType'),
            'visibility': event.get('visibility'),
            'attendees': json.dumps(event.get('attendees', [])) if event.get('attendees') else None,
            'recurrence': json.dumps(event.get('recurrence', [])) if event.get('recurrence') else None,
            'extracted_at': datetime.utcnow().isoformat()
        }
        rows.append(row)
    
    return rows

# Prepare data
prepared_rows = prepare_events_for_databricks(calendar_events)
print(f"✓ Prepared {len(prepared_rows)} events for Databricks")

In [None]:
# Convert to Spark DataFrame
spark = SparkSession.builder.getOrCreate()

# Define schema
schema = StructType([
    StructField("event_id", StringType(), False),
    StructField("summary", StringType(), True),
    StructField("description", StringType(), True),
    StructField("location", StringType(), True),
    StructField("creator_email", StringType(), True),
    StructField("organizer_email", StringType(), True),
    StructField("start_datetime", TimestampType(), True),
    StructField("start_date", DateType(), True),
    StructField("start_timezone", StringType(), True),
    StructField("end_datetime", TimestampType(), True),
    StructField("end_date", DateType(), True),
    StructField("end_timezone", StringType(), True),
    StructField("status", StringType(), True),
    StructField("html_link", StringType(), True),
    StructField("created", TimestampType(), True),
    StructField("updated", TimestampType(), True),
    StructField("event_type", StringType(), True),
    StructField("visibility", StringType(), True),
    StructField("attendees", StringType(), True),
    StructField("recurrence", StringType(), True),
    StructField("extracted_at", TimestampType(), True),
])

# Create DataFrame
df = spark.createDataFrame(prepared_rows, schema=schema)
print(f"✓ Created DataFrame with {df.count()} rows")

# Show sample data
print("\nSample data:")
df.select("event_id", "summary", "start_datetime", "status").show(5, truncate=False)

In [None]:
# Create database if it doesn't exist
spark.sql(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}")
print(f"✓ Database '{DATABASE_NAME}' is ready")

# Write to Delta table
table_path = f"{DATABASE_NAME}.{TABLE_NAME}"

# Write with merge/upsert to avoid duplicates
df.write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .saveAsTable(table_path)

print(f"✓ Successfully wrote {df.count()} events to {table_path}")

# Verify the data
result_df = spark.sql(f"""
    SELECT 
        COUNT(*) as total_events,
        MIN(start_datetime) as earliest_event,
        MAX(start_datetime) as latest_event,
        COUNT(DISTINCT status) as unique_statuses
    FROM {table_path}
""")

print("\nTable Summary:")
result_df.show(truncate=False)

## Setup Instructions for Databricks

### Prerequisites
Before running this notebook, set up Google Calendar API access:

### 1. Enable Google Calendar API
   - Go to [Google Cloud Console](https://console.cloud.google.com/)
   - Create or select your project
   - Navigate to "APIs & Services" > "Library"
   - Search for "Google Calendar API" and enable it

### 2. Create Service Account
   - Go to "APIs & Services" > "Credentials"
   - Click "Create Credentials" > "Service Account"
   - Give it a name (e.g., "databricks-calendar-reader")
   - Grant it appropriate roles
   - Click "Done"

### 3. Download Service Account Key
   - Click on the created service account
   - Go to "Keys" tab
   - Click "Add Key" > "Create new key"
   - Choose JSON format
   - Download the key file

### 4. (Optional) Set up Domain-Wide Delegation
   If you need to access another user's calendar:
   - In the service account details, enable "Domain-Wide Delegation"
   - Note the Client ID
   - In Google Workspace Admin Console:
     - Go to Security > API Controls > Domain-wide Delegation
     - Add the Client ID with scope: `https://www.googleapis.com/auth/calendar.readonly`
   - Share the target calendar with the service account email OR use delegation

### 5. Store Credentials in Databricks Secrets
   Run the cell below to set up your secrets

In [None]:
# SETUP: Run this once to store your credentials in Databricks secrets

# Step 1: Create secret scope (if it doesn't exist)
try:
    dbutils.secrets.listScopes()
    # Check if 'google' scope exists
    scopes = [s.name for s in dbutils.secrets.listScopes()]
    if 'google' not in scopes:
        dbutils.secrets.createScope(scope="google")
        print("✓ Created 'google' secret scope")
    else:
        print("✓ Secret scope 'google' already exists")
except:
    print("! Could not create scope - it may already exist or you may need admin permissions")

# Step 2: Store your service account JSON
# Replace the content below with your actual service account JSON from Google Cloud
service_account_json = '''{
  "type": "service_account",
  "project_id": "your-project-id",
  "private_key_id": "your-key-id",
  "private_key": "-----BEGIN PRIVATE KEY-----\\nYOUR_PRIVATE_KEY\\n-----END PRIVATE KEY-----\\n",
  "client_email": "your-service-account@your-project.iam.gserviceaccount.com",
  "client_id": "your-client-id",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "your-cert-url"
}'''

# Store the service account credentials
dbutils.secrets.put(scope="google", key="calendar_service_account", string_value=service_account_json)
print("✓ Stored calendar_service_account secret")

# Step 3: (Optional) If using domain-wide delegation, store the user email
# dbutils.secrets.put(scope="google", key="calendar_user_email", string_value="user@yourdomain.com")
# print("✓ Stored calendar_user_email secret")

print("\n✅ Setup complete! You can now run the cells above to fetch calendar data.")

### Alternative: Using Databricks CLI to set secrets

If you prefer using the Databricks CLI instead of the notebook:

```bash
# Install Databricks CLI
pip install databricks-cli

# Configure authentication
databricks configure --token

# Create secret scope
databricks secrets create-scope --scope google

# Store service account (from a file)
databricks secrets put --scope google --key calendar_service_account --string-value "$(cat service-account.json)"

# Store user email (if needed)
databricks secrets put --scope google --key calendar_user_email --string-value "your.email@domain.com"
```

### Quick Start
1. Run the setup cell above to store your credentials
2. Run cells 1-7 in order to:
   - Install packages
   - Authenticate with Google Calendar
   - Fetch all calendar events
   - Transform data
   - Write to Delta table `develop.google_calendar`