# Economic Data Collection & Exploration
## Multi-Agent Economic Forecasting System - Notebook 1

**Objective**: Learn to collect economic data from BEA API and perform initial exploration using our multi-agent system.

### What You'll Learn:
- BEA API integration and data retrieval
- Economic data preprocessing and cleaning
- Initial exploratory data analysis
- Data quality assessment using statistical tools
- Multi-agent system integration for data collection

## 1. Setup and Installation

In [None]:
# Install required packages
!pip install -q google-adk pandas numpy matplotlib seaborn plotly requests python-dotenv statsmodels scikit-learn

In [None]:
# Import libraries
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from dotenv import load_dotenv
import asyncio

# Add src to Python path for our custom modules
sys.path.append('../src')

# Load environment variables
load_dotenv()

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("‚úÖ Libraries imported successfully")
print(f"üìä Pandas version: {pd.__version__}")
print(f"üî¢ NumPy version: {np.__version__}")

## 2. Initialize Multi-Agent System

In [None]:
# Import our multi-agent components
from google.adk.models.google_llm import Gemini
from google.genai import types
from agents.data_collector import DataCollectorAgent
from tools.bea_client import BEAClient, process_bea_response

# Configure API keys
bea_api_key = os.getenv('BEA_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if not bea_api_key or not google_api_key:
    print("‚ùå Please set BEA_API_KEY and GOOGLE_API_KEY in your .env file")
    print("üí° Get BEA API key from: https://www.bea.gov/API/signup/")
    print("üí° Get Google API key from: https://aistudio.google.com/")
else:
    print("‚úÖ API keys loaded successfully")

# Configure retry options for robust API calls
retry_config = types.HttpRetryOptions(
    attempts=5,
    exp_base=7,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504],
)

# Initialize the Gemini model
model = Gemini(
    model="gemini-2.0-flash-exp",
    retry_options=retry_config
)

print("ü§ñ Multi-agent system components initialized")

## 3. BEA API Data Collection

In [None]:
# Initialize BEA client directly for manual data exploration
bea_client = BEAClient(bea_api_key)

# Fetch different types of economic data
print("üì• Collecting economic data from BEA API...")

async def collect_economic_data():
    """Collect various economic datasets from BEA"""
    datasets = {}

    # GDP Data (Table 1.1.5 - Gross Domestic Product)
    print("üî∏ Fetching GDP data...")
    datasets['gdp'] = await bea_client.get_gdp_data()

    # Unemployment and Labor Data
    print("üî∏ Fetching unemployment data...")
    datasets['unemployment'] = await bea_client.get_unemployment_data()

    # Inflation Data (Price Indexes)
    print("üî∏ Fetching inflation data...")
    datasets['inflation'] = await bea_client.get_inflation_data()

    return datasets

# Execute data collection
economic_datasets = await collect_economic_data()
print("‚úÖ Economic data collection completed")

In [None]:
# Examine the structure of collected data
print("üìã Dataset Overview:")
for dataset_name, data in economic_datasets.items():
    if data.get('BEAAPI', {}).get('Results', {}).get('Data'):
        data_points = len(data['BEAAPI']['Results']['Data'])
        print(f"   {dataset_name.upper():<15}: {data_points} data points")
    else:
        print(f"   {dataset_name.upper():<15}: No data retrieved")

## 4. Data Processing and Cleaning

In [None]:
# Process GDP data into DataFrame
gdp_df = process_bea_response(economic_datasets['gdp'])

print("üìä GDP Data Structure:")
print(f"Shape: {gdp_df.shape}")
print(f"Columns: {gdp_df.columns.tolist()}")

# Display first few rows
print("\nüîç First 10 rows of GDP data:")
gdp_df.head(10)

In [None]:
# Data cleaning and transformation
def clean_economic_data(df):
    """Clean and transform economic data for analysis"""

    # Create a copy to avoid modifying original
    clean_df = df.copy()

    # Convert DataValue to numeric, handling errors
    if 'DataValue' in clean_df.columns:
        clean_df['DataValue'] = pd.to_numeric(clean_df['DataValue'], errors='coerce')

    # Convert TimePeriod to datetime if it exists
    if 'TimePeriod' in clean_df.columns:
        clean_df['TimePeriod'] = pd.to_datetime(clean_df['TimePeriod'])

    # Sort by time period
    if 'TimePeriod' in clean_df.columns:
        clean_df = clean_df.sort_values('TimePeriod')

    # Remove rows with missing values in critical columns
    critical_cols = ['DataValue']
    clean_df = clean_df.dropna(subset=critical_cols)

    return clean_df

# Clean the GDP data
gdp_clean = clean_economic_data(gdp_df)
print(f"‚úÖ Data cleaning completed. Remaining rows: {len(gdp_clean)}")
print(f"üìÖ Date range: {gdp_clean['TimePeriod'].min()} to {gdp_clean['TimePeriod'].max()}")

## 5. Exploratory Data Analysis

In [None]:
# Basic statistical summary
print("üìà GDP Data Statistical Summary:")
if 'DataValue' in gdp_clean.columns:
    print(gdp_clean['DataValue'].describe())

# Check for missing values
print("\nüîç Missing Values Analysis:")
missing_data = gdp_clean.isnull().sum()
print(missing_data[missing_data > 0])

In [None]:
# Time series visualization of GDP data
plt.figure(figsize=(12, 6))

if 'TimePeriod' in gdp_clean.columns and 'DataValue' in gdp_clean.columns:
    plt.plot(gdp_clean['TimePeriod'], gdp_clean['DataValue'],
             marker='o', linewidth=2, markersize=4, label='GDP')

    plt.title('GDP Over Time', fontsize=16, fontweight='bold')
    plt.xlabel('Time Period', fontsize=12)
    plt.ylabel('GDP Value', fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
else:
    print("‚ùå Required columns not found for visualization")

## 6. Using Data Collector Agent

In [None]:
# Initialize the Data Collector Agent
data_collector_agent = DataCollectorAgent(bea_api_key, model)

print("ü§ñ Data Collector Agent initialized")
print("üõ†Ô∏è Available tools:")
for tool in data_collector_agent.agent.tools:
    print(f"   - {tool.name}")

In [None]:
# Test the agent's data collection capabilities
async def test_data_collector():
    """Test the data collector agent's functionality"""

    print("üß™ Testing Data Collector Agent...")

    # Test GDP data collection
    gdp_result = await data_collector_agent.get_gdp_data()
    print(f"üìä GDP Data Collection: {gdp_result['status']}")
    if gdp_result['status'] == 'success':
        print(f"   Data points: {len(gdp_result['data'])}")
        print(f"   Message: {gdp_result['message']}")

    # Test unemployment data collection
    unemployment_result = await data_collector_agent.get_unemployment_data()
    print(f"üìä Unemployment Data Collection: {unemployment_result['status']}")
    if unemployment_result['status'] == 'success':
        print(f"   Data points: {len(unemployment_result['data'])}")
        print(f"   Message: {unemployment_result['message']}")

# Run the test
await test_data_collector()

## 7. Data Quality Assessment

In [None]:
from tools.statistical_tools import StatisticalTools

# Initialize statistical tools
stat_tools = StatisticalTools()

# Assess data quality using statistical tools
if 'DataValue' in gdp_clean.columns:
    quality_assessment = stat_tools.calculate_indicators(gdp_clean)

    if quality_assessment['status'] == 'success':
        print("‚úÖ Data Quality Assessment:")
        indicators = quality_assessment['indicators']

        print(f"   Current Value: {indicators.get('current_value', 'N/A'):.2f}")
        print(f"   Mean: {indicators.get('mean', 'N/A'):.2f}")
        print(f"   Standard Deviation: {indicators.get('std_dev', 'N/A'):.2f}")
        print(f"   Recent Growth: {indicators.get('recent_growth', 'N/A'):.2f}%")
        print(f"   Volatility: {indicators.get('volatility', 'N/A'):.2f}%")
        print(f"   Trend Strength: {indicators.get('trend_strength', 'N/A'):.2f}")
        print(f"   Business Cycle: {indicators.get('business_cycle_position', 'N/A')}")
    else:
        print("‚ùå Data quality assessment failed")
else:
    print("‚ùå No DataValue column found for assessment")

## 8. Advanced Visualizations

In [None]:
# Create interactive plot using Plotly
if 'TimePeriod' in gdp_clean.columns and 'DataValue' in gdp_clean.columns:
    fig = go.Figure()

    # Add main GDP line
    fig.add_trace(go.Scatter(
        x=gdp_clean['TimePeriod'],
        y=gdp_clean['DataValue'],
        mode='lines+markers',
        name='GDP',
        line=dict(color='#1f77b4', width=3),
        marker=dict(size=6)
    ))

    # Add rolling average for trend
    gdp_clean_sorted = gdp_clean.sort_values('TimePeriod')
    rolling_avg = gdp_clean_sorted['DataValue'].rolling(window=4).mean()

    fig.add_trace(go.Scatter(
        x=gdp_clean_sorted['TimePeriod'],
        y=rolling_avg,
        mode='lines',
        name='4-Quarter Moving Average',
        line=dict(color='#ff7f0e', width=2, dash='dash')
    ))

    fig.update_layout(
        title='GDP Time Series with Trend Analysis',
        xaxis_title='Time Period',
        yaxis_title='GDP Value',
        template='plotly_white',
        height=500
    )

    fig.show()
else:
    print("‚ùå Required columns not found for interactive visualization")

## 9. Summary and Next Steps

In [None]:
print("üéØ NOTEBOOK 1 SUMMARY")
print("=" * 50)

if 'gdp_clean' in locals() and not gdp_clean.empty:
    print(f"‚úÖ Successfully collected and processed {len(gdp_clean)} GDP data points")
    print(f"üìÖ Data covers period: {gdp_clean['TimePeriod'].min().strftime('%Y-%m')} to {gdp_clean['TimePeriod'].max().strftime('%Y-%m')}")

    if 'DataValue' in gdp_clean.columns:
        latest_gdp = gdp_clean['DataValue'].iloc[-1]
        growth_rate = gdp_clean['DataValue'].pct_change().iloc[-1] * 100
        print(f"üìà Latest GDP: {latest_gdp:.2f}")
        print(f"üìà Latest quarterly growth: {growth_rate:.2f}%")

print("\nüîú Next Steps:")
print("   1. Proceed to Notebook 2: Economic Indicator Analysis")
print("   2. Explore other economic datasets (unemployment, inflation)")
print("   3. Use the Data Collector Agent for automated data retrieval")

print("\nüí° Tips for Production Use:")
print("   - Set up scheduled data collection using the Data Collector Agent")
print("   - Implement error handling for API rate limits")
print("   - Cache data to reduce API calls")
print("   - Monitor data quality with statistical tools")