# Part 1: Get Google Form Responses

This notebook connects to the Google Forms API to fetch form responses and saves them for further processing.

## Output
- `raw_form_responses.pickle`: Raw form data for preprocessing
- `form_metadata.json`: Form structure and metadata

## Prerequisites
- Google Cloud Project with Forms API enabled
- `credentials.json` file in the project directory
- Form ID of the Google Form you want to analyze

In [None]:
# Install required packages for Google API
!pip install google-auth google-auth-oauthlib google-api-python-client pandas

import pandas as pd
import json
import pickle
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

print("Libraries imported successfully!")

In [None]:
# Configuration
SCOPES = ['https://www.googleapis.com/auth/forms.responses.readonly',
          'https://www.googleapis.com/auth/forms.body.readonly']

# Update this with your form ID
FORM_ID = 'YOUR_FORM_ID_HERE'  # Replace with actual form ID

def authenticate_google_api():
    """Authenticate with Google API using OAuth2"""
    creds = None
    
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    
    return creds

# Authenticate and build service
try:
    creds = authenticate_google_api()
    service = build('forms', 'v1', credentials=creds)
    print("✅ Successfully authenticated with Google Forms API!")
except Exception as e:
    print(f"❌ Authentication error: {e}")
    print("Please ensure you have credentials.json file in the current directory")

In [None]:
def get_form_structure(service, form_id):
    """Get the form structure to understand question types and IDs"""
    try:
        form = service.forms().get(formId=form_id).execute()
        return form
    except HttpError as e:
        print(f"Error getting form structure: {e}")
        return None

def get_form_responses(service, form_id):
    """Fetch all responses from the Google Form"""
    try:
        responses = service.forms().responses().list(formId=form_id).execute()
        return responses.get('responses', [])
    except HttpError as e:
        print(f"Error fetching responses: {e}")
        return []

def process_responses_to_dataframe(form_structure, responses):
    """Convert form responses to a pandas DataFrame"""
    if not responses:
        print("No responses found")
        return pd.DataFrame()
    
    # Extract question titles and IDs
    questions = {}
    if 'items' in form_structure:
        for item in form_structure['items']:
            if 'questionItem' in item:
                question_id = item['questionItem']['question']['questionId']
                title = item['title']
                questions[question_id] = title
    
    # Process responses
    processed_data = []
    
    for response in responses:
        response_data = {
            'response_id': response.get('responseId', ''),
            'create_time': response.get('createTime', ''),
            'last_submitted_time': response.get('lastSubmittedTime', '')
        }
        
        # Extract answers
        if 'answers' in response:
            for question_id, answer in response['answers'].items():
                question_title = questions.get(question_id, f'Question_{question_id}')
                
                # Handle different answer types
                if 'textAnswers' in answer:
                    text_values = [ta.get('value', '') for ta in answer['textAnswers']['answers']]
                    response_data[question_title] = '; '.join(text_values)
                elif 'fileUploadAnswers' in answer:
                    response_data[question_title] = 'File uploaded'
                else:
                    response_data[question_title] = str(answer)
        
        processed_data.append(response_data)
    
    return pd.DataFrame(processed_data)

print("Form processing functions defined!")

In [None]:
# Fetch form data
print("📥 Fetching form structure...")
form_structure = get_form_structure(service, FORM_ID)

if form_structure:
    print(f"📋 Form Title: {form_structure.get('info', {}).get('title', 'Unknown')}")
    print(f"📝 Form Description: {form_structure.get('info', {}).get('description', 'No description')}")
    
    print("\n📊 Fetching responses...")
    responses = get_form_responses(service, FORM_ID)
    
    print(f"🔢 Found {len(responses)} responses")
    
    if len(responses) > 0:
        # Convert to DataFrame
        df = process_responses_to_dataframe(form_structure, responses)
        print(f"📊 DataFrame shape: {df.shape}")
        
        # Save raw data to pickle for next notebook
        with open('raw_form_responses.pickle', 'wb') as f:
            pickle.dump({
                'dataframe': df,
                'form_structure': form_structure,
                'raw_responses': responses
            }, f)
        
        # Save metadata to JSON
        metadata = {
            'form_id': FORM_ID,
            'form_title': form_structure.get('info', {}).get('title', 'Unknown'),
            'form_description': form_structure.get('info', {}).get('description', ''),
            'total_responses': len(responses),
            'dataframe_shape': df.shape,
            'columns': df.columns.tolist()
        }
        
        with open('form_metadata.json', 'w') as f:
            json.dump(metadata, f, indent=2)
        
        print("\n✅ Data successfully saved!")
        print("📁 Files created:")
        print("  - raw_form_responses.pickle (for preprocessing)")
        print("  - form_metadata.json (metadata)")
        
        # Display preview
        print(f"\n👀 Preview of first 3 responses:")
        display(df.head(3))
        
    else:
        print("⚠️ No responses found in the form")
        
else:
    print("❌ Could not fetch form structure. Please check your FORM_ID and permissions.")