In [36]:
import os
os.chdir("/Users/naveenkumar/Desktop/formula-1-bot")
%pwd

'/Users/naveenkumar/Desktop/formula-1-bot'

In [37]:
import requests
import pandas as pd
import json
import time
import logging
from pathlib import Path
from typing import Dict, List, Any, Optional
from urllib.parse import urlencode
from dataclasses import dataclass

In [38]:
# Update Cell 3: Data classes
@dataclass
class DataIngestionConfig:
    """Configuration for data ingestion from OpenF1 API"""
    source_url: str = "https://api.openf1.org/v1"
    api_timeout: int = 30
    max_retries: int = 3
    batch_size: int = 1000
    target_meetings: List[Dict[str, str]] = None
    sessions_to_ingest: List[str] = None
    endpoints: List[str] = None
    
    def __post_init__(self):
        if self.target_meetings is None:
            self.target_meetings = [{"meeting_key": "latest"}]
        if self.sessions_to_ingest is None:
            self.sessions_to_ingest = [
                "Practice 1", "Practice 2", "Practice 3", 
                "Qualifying", "Sprint", "Race"
            ]
        if self.endpoints is None:
            self.endpoints = [
                "meetings", "sessions", "drivers", "laps", 
                "pit", "stints", "position", 
                "intervals", "race_control", 
                "weather"
            ]

In [39]:
@dataclass
class DatabaseConfig:
    """Configuration for database"""
    host: str = "localhost"
    port: int = 5432
    database: str = "f1_bot"
    user: str = "f1_user"
    password: str = "f1_password"
    
    @property
    def connection_string(self) -> str:
        return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"

In [40]:
from src.formula_one.constants import *
from src.formula_one.utils.common import read_yaml, create_directories

In [41]:
# Update Cell 6: Configuration Manager
class ConfigurationManager:
    """Manages configuration loading from YAML files"""
    
    def __init__(self, config_file_path: str = "config/config.yaml"):
        self.config_file_path = Path(config_file_path)
        self.config = read_yaml(self.config_file_path)
    
    def get_data_ingestion_config(self) -> DataIngestionConfig:
        """Get data ingestion configuration"""
        config_data = self.config.get('data_ingestion', {})
        
        return DataIngestionConfig(
            # API configuration only
            source_url=config_data.get('api', {}).get('base_url', 'https://api.openf1.org/v1'),
            api_timeout=config_data.get('api', {}).get('timeout', 30),
            max_retries=config_data.get('api', {}).get('max_retries', 3),
            batch_size=config_data.get('api', {}).get('batch_size', 1000),
            target_meetings=config_data.get('target_meetings', [{"meeting_key": "1219"}]),
            sessions_to_ingest=config_data.get('sessions_to_ingest', [
                "Practice 1", "Practice 2", "Practice 3", "Qualifying", "Race"
            ]),
            endpoints=config_data.get('endpoints', [
                "meetings", "sessions", "drivers", "laps", "pit", "stints", 
                "position", "intervals", "team_radio", "race_control", "weather"
            ])
        )

In [42]:
# Cell 6: API Client
import time

class OpenF1APIClient:
    def __init__(self, config: DataIngestionConfig):
        self.config = config
        self.session = requests.Session()
        self.session.timeout = config.api_timeout
        self.logger = logging.getLogger(__name__)
        self.last_request_time = 0
        self.min_request_interval = 0.5  # 500ms between requests
    
    def _make_request(self, endpoint: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
        """Make API request with retry logic and rate limiting"""
        # Rate limiting
        current_time = time.time()
        time_since_last = current_time - self.last_request_time
        if time_since_last < self.min_request_interval:
            time.sleep(self.min_request_interval - time_since_last)
        
        url = f"{self.config.source_url}/{endpoint}"
        
        if params:
            url += f"?{urlencode(params)}"
        
        for attempt in range(self.config.max_retries):
            try:
                self.logger.info(f"Making request to: {url}")
                response = self.session.get(url)
                self.last_request_time = time.time()
                
                response.raise_for_status()
                
                data = response.json()
                self.logger.info(f"Successfully retrieved {len(data)} records from {endpoint}")
                return data
                
            except requests.exceptions.RequestException as e:
                self.logger.warning(f"Attempt {attempt + 1} failed for {endpoint}: {e}")
                if attempt == self.config.max_retries - 1:
                    raise
                time.sleep(2 ** attempt)  # Exponential backoff
    
    def get_meetings(self, year: Optional[int] = None) -> List[Dict[str, Any]]:
        """Get meetings/race weekends"""
        params = {"year": year} if year else {}
        return self._make_request("meetings", params)
    
    def get_sessions(self, meeting_key: str) -> List[Dict[str, Any]]:
        """Get sessions for a specific meeting"""
        params = {"meeting_key": meeting_key}
        return self._make_request("sessions", params)
    
    def get_drivers(self, session_key: str) -> List[Dict[str, Any]]:
        """Get drivers for a specific session"""
        params = {"session_key": session_key}
        return self._make_request("drivers", params)
    
    def get_laps(self, session_key: str) -> List[Dict[str, Any]]:
        """Get lap data for a specific session"""
        params = {"session_key": session_key}
        return self._make_request("laps", params)
    
    # def get_car_data(self, session_key: str, driver_number: Optional[int] = None) -> List[Dict[str, Any]]:
    #     """Get car telemetry data"""
    #     params = {"session_key": session_key}
    #     if driver_number:
    #         params["driver_number"] = driver_number
    #     return self._make_request("car_data", params)
    
    def get_pit_stops(self, session_key: str) -> List[Dict[str, Any]]:
        """Get pit stop data"""
        params = {"session_key": session_key}
        return self._make_request("pit", params)
    
    def get_stints(self, session_key: str) -> List[Dict[str, Any]]:
        """Get stint data"""
        params = {"session_key": session_key}
        return self._make_request("stints", params)
    
    def get_positions(self, session_key: str) -> List[Dict[str, Any]]:
        """Get position data"""
        params = {"session_key": session_key}
        return self._make_request("position", params)
    
    def get_intervals(self, session_key: str) -> List[Dict[str, Any]]:
        """Get interval data"""
        params = {"session_key": session_key}
        return self._make_request("intervals", params)
    
    # def get_team_radio(self, session_key: str) -> List[Dict[str, Any]]:
    #     """Get team radio data"""
    #     params = {"session_key": session_key}
    #     return self._make_request("team_radio", params)
    
    def get_race_control(self, session_key: str) -> List[Dict[str, Any]]:
        """Get race control data"""
        params = {"session_key": session_key}
        return self._make_request("race_control", params)
    
    def get_weather(self, session_key: str) -> List[Dict[str, Any]]:
        """Get weather data"""
        params = {"session_key": session_key}
        return self._make_request("weather", params)

In [43]:
# Update the DataIngestion class methods
class DataIngestion:
    """Main data ingestion pipeline"""
    
    def __init__(self, data_ingestion_config: DataIngestionConfig, api_client: OpenF1APIClient):
        self.data_ingestion_config = data_ingestion_config
        self.api_client = api_client
        self.logger = logging.getLogger(__name__)
    
    # Add this method to your DataIngestion class (around line 150)
    def _fetch_endpoint_data(self, endpoint: str, session_key: str) -> List[Dict[str, Any]]:
        """Fetch data for a specific endpoint"""
        endpoint_methods = {
            "sessions": lambda: self.api_client.get_sessions(session_key),
            "drivers": lambda: self.api_client.get_drivers(session_key),
            "laps": lambda: self.api_client.get_laps(session_key),
            #"car_data": lambda: self._safe_get_car_data(session_key),  # Use safe method
            "pit": lambda: self.api_client.get_pit_stops(session_key),
            "stints": lambda: self.api_client.get_stints(session_key),
            "position": lambda: self.api_client.get_positions(session_key),
            "intervals": lambda: self.api_client.get_intervals(session_key),
            #"team_radio": lambda: self.api_client.get_team_radio(session_key),
            "race_control": lambda: self.api_client.get_race_control(session_key),
            "weather": lambda: self.api_client.get_weather(session_key)
        }
        
        if endpoint in endpoint_methods:
            return endpoint_methods[endpoint]()
        else:
            self.logger.warning(f"Unknown endpoint: {endpoint}")
            return []

    # def _safe_get_car_data(self, session_key: str) -> List[Dict[str, Any]]:
    #     """Safely get car data, return empty list if not available"""
    #     try:
    #         return self.api_client.get_car_data(session_key)
    #     except Exception as e:
    #         self.logger.warning(f"Car data not available for session {session_key}: {e}")
    #         return []
    
    def ingest_meeting_data(self, meeting_key: str) -> Dict[str, Any]:
        """Ingest all data for a specific meeting"""
        self.logger.info(f"Starting data ingestion for meeting: {meeting_key}")
        
        # Get meeting info
        meetings = self.api_client.get_meetings()
        meeting_info = next((m for m in meetings if str(m.get('meeting_key')) == meeting_key), None)
        
        if not meeting_info:
            self.logger.error(f"Meeting {meeting_key} not found")
            return {}
        
        # Get sessions for this meeting
        sessions = self.api_client.get_sessions(meeting_key)
        self.logger.info(f"Found {len(sessions)} sessions for meeting {meeting_key}")
        
        meeting_data = {
            "meeting_info": meeting_info,
            "sessions": {},
        }
        
        # Ingest data for each session
        for session in sessions:
            session_name = session.get('session_name', 'Unknown')
            session_key = session.get('session_key')
            
            if session_name not in self.data_ingestion_config.sessions_to_ingest:
                self.logger.info(f"Skipping session: {session_name}")
                continue
            
            self.logger.info(f"Ingesting data for session: {session_name}")
            session_data = self._ingest_session_data(session_key, session_name, meeting_key)
            
            # Add session data to meeting_data
            meeting_data["sessions"][session_name] = session_data
        
        return meeting_data
    
    def _ingest_session_data(self, session_key: str, session_name: str, meeting_key: str) -> Dict[str, Any]:
        """Ingest data for a specific session"""
        session_data = {}
        
        for endpoint in self.data_ingestion_config.endpoints:
            try:
                if endpoint == "meetings":
                    continue
                
                data = self._fetch_endpoint_data(endpoint, session_key)
                session_data[endpoint] = data
                
                # Remove file saving - data will go to database
                self.logger.info(f"Retrieved {len(data)} records from {endpoint} for session {session_key}")
                
            except Exception as e:
                self.logger.error(f"Error ingesting {endpoint} for session {session_key}: {e}")
                session_data[endpoint] = []
        
        return session_data
    
    # Remove _save_raw_data and _save_meeting_data methods - no longer needed
    
    def run_ingestion(self):
        """Run the complete data ingestion pipeline"""
        self.logger.info("Starting data ingestion pipeline")
        
        for meeting_config in self.config.target_meetings:
            meeting_key = meeting_config["meeting_key"]
            try:
                self.ingest_meeting_data(meeting_key)
            except Exception as e:
                self.logger.error(f"Error ingesting meeting {meeting_key}: {e}")
        
        self.logger.info("Data ingestion pipeline completed")

In [44]:
# Add this new cell with the complete DatabaseIngestion class
import psycopg2
from psycopg2.extras import RealDictCursor
import pandas as pd

class DatabaseIngestion:
    """Handles database operations for F1 data"""
    
    def __init__(self, data_config: DataIngestionConfig, db_config: DatabaseConfig, api_client: OpenF1APIClient):
        self.data_config = data_config
        self.db_config = db_config
        self.api_client = api_client  # Add this line
        self.logger = logging.getLogger(__name__)
    
    def connect_to_db(self):
        """Create database connection"""
        try:
            conn = psycopg2.connect(
                host=self.db_config.host,
                port=self.db_config.port,
                database=self.db_config.database,
                user=self.db_config.user,
                password=self.db_config.password
            )
            self.logger.info("Successfully connected to PostgreSQL database")
            return conn
        except Exception as e:
            self.logger.error(f"Failed to connect to database: {e}")
            raise
    
    # Update the create_tables method with optimized ML-focused schema
    def create_tables(self):
        """Create database tables optimized for ML models"""
        conn = self.connect_to_db()
        cursor = conn.cursor()
        
        # Drop existing tables to recreate with optimized schema
        cursor.execute("DROP TABLE IF EXISTS weather CASCADE")
        cursor.execute("DROP TABLE IF EXISTS race_control CASCADE")
        #cursor.execute("DROP TABLE IF EXISTS team_radio CASCADE")
        cursor.execute("DROP TABLE IF EXISTS intervals CASCADE")
        cursor.execute("DROP TABLE IF EXISTS stints CASCADE")
        cursor.execute("DROP TABLE IF EXISTS pit_stops CASCADE")
        cursor.execute("DROP TABLE IF EXISTS positions CASCADE")
        cursor.execute("DROP TABLE IF EXISTS laps CASCADE")
        cursor.execute("DROP TABLE IF EXISTS drivers CASCADE")
        cursor.execute("DROP TABLE IF EXISTS sessions CASCADE")
        cursor.execute("DROP TABLE IF EXISTS meetings CASCADE")
        
        # Core tables with essential fields only
        cursor.execute("""
            CREATE TABLE meetings (
                meeting_key INTEGER PRIMARY KEY,
                meeting_name VARCHAR(255),
                country_name VARCHAR(100),
                circuit_short_name VARCHAR(100),
                date_start TIMESTAMP,
                year INTEGER,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        cursor.execute("""
            CREATE TABLE sessions (
                session_key INTEGER PRIMARY KEY,
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                session_name VARCHAR(100),
                session_type VARCHAR(50),
                date_start TIMESTAMP,
                date_end TIMESTAMP,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        cursor.execute("""
            CREATE TABLE drivers (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                driver_number INTEGER,
                full_name VARCHAR(100),
                team_name VARCHAR(100),
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        # ML-focused laps table
        cursor.execute("""
            CREATE TABLE laps (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                driver_number INTEGER,
                lap_number INTEGER,
                lap_duration FLOAT,
                duration_sector_1 FLOAT,
                duration_sector_2 FLOAT,
                duration_sector_3 FLOAT,
                is_pit_out_lap BOOLEAN,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        # ML-focused pit stops
        cursor.execute("""
            CREATE TABLE pit_stops (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                driver_number INTEGER,
                lap_number INTEGER,
                pit_duration FLOAT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        # ML-focused stints
        cursor.execute("""
            CREATE TABLE stints (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                driver_number INTEGER,
                compound VARCHAR(20),
                lap_start INTEGER,
                lap_end INTEGER,
                tyre_age_at_start INTEGER,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        # ML-focused positions
        cursor.execute("""
            CREATE TABLE positions (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                driver_number INTEGER,
                position INTEGER,
                date TIMESTAMP,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        # ML-focused weather
        cursor.execute("""
            CREATE TABLE weather (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                air_temperature FLOAT,
                track_temperature FLOAT,
                humidity FLOAT,
                rainfall BOOLEAN,
                date TIMESTAMP,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)

        cursor.execute("""
            CREATE TABLE intervals (
                id SERIAL PRIMARY KEY,
                session_key INTEGER REFERENCES sessions(session_key),
                meeting_key INTEGER REFERENCES meetings(meeting_key),
                driver_number INTEGER,
                gap_to_leader FLOAT,  -- Time gap to race leader in seconds
                interval FLOAT,       -- Time gap to car ahead in seconds
                date TIMESTAMP,       -- UTC date and time
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        """)
        
        conn.commit()
        cursor.close()
        conn.close()
        self.logger.info("Optimized database tables created successfully")
    
    def insert_meeting_data(self, meeting_data: Dict[str, Any]):
        """Insert meeting data into database with optimized schema"""
        conn = self.connect_to_db()
        cursor = conn.cursor()
        
        try:
            # Insert meeting info - OPTIMIZED FIELDS ONLY
            meeting_info = meeting_data["meeting_info"]
            cursor.execute("""
                INSERT INTO meetings (
                    meeting_key, meeting_name, country_name, 
                    circuit_short_name, date_start, year
                ) VALUES (%s, %s, %s, %s, %s, %s)
                ON CONFLICT (meeting_key) DO NOTHING
            """, (
                meeting_info.get('meeting_key'),
                meeting_info.get('meeting_name'),
                meeting_info.get('country_name'),
                meeting_info.get('circuit_short_name'),
                meeting_info.get('date_start'),
                meeting_info.get('year')
            ))
            
            # Get the original sessions list
            sessions_list = self.api_client.get_sessions(str(meeting_info.get('meeting_key')))
            
            # Insert sessions - OPTIMIZED FIELDS ONLY
            for session_name, session_data in meeting_data["sessions"].items():
                # Find the session info from the original sessions list
                session_info = next((s for s in sessions_list if s.get('session_name') == session_name), None)
                
                if session_info:
                    cursor.execute("""
                        INSERT INTO sessions (
                            session_key, meeting_key, session_name, session_type,
                            date_start, date_end
                        ) VALUES (%s, %s, %s, %s, %s, %s)
                        ON CONFLICT (session_key) DO NOTHING
                    """, (
                        session_info.get('session_key'),
                        meeting_info.get('meeting_key'),
                        session_info.get('session_name'),
                        session_info.get('session_type'),
                        session_info.get('date_start'),
                        session_info.get('date_end')
                    ))
                    
                    # Insert drivers for this session - OPTIMIZED FIELDS ONLY
                    if "drivers" in session_data and session_data["drivers"]:
                        for driver in session_data["drivers"]:
                            cursor.execute("""
                                INSERT INTO drivers (
                                    session_key, meeting_key, driver_number,
                                    full_name, team_name
                                ) VALUES (%s, %s, %s, %s, %s)
                                ON CONFLICT DO NOTHING
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                driver.get('driver_number'),
                                driver.get('full_name'),
                                driver.get('team_name')
                            ))
                    
                    # Insert laps for this session - OPTIMIZED FIELDS ONLY
                    if "laps" in session_data and session_data["laps"]:
                        for lap in session_data["laps"]:
                            cursor.execute("""
                                INSERT INTO laps (
                                    session_key, meeting_key, driver_number, lap_number,
                                    lap_duration, duration_sector_1, duration_sector_2, 
                                    duration_sector_3, is_pit_out_lap
                                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                lap.get('driver_number'),
                                lap.get('lap_number'),
                                lap.get('lap_duration'),
                                lap.get('duration_sector_1'),
                                lap.get('duration_sector_2'),
                                lap.get('duration_sector_3'),
                                lap.get('is_pit_out_lap')
                            ))
                    
                    # Insert pit stops for this session - OPTIMIZED FIELDS ONLY
                    if "pit" in session_data and session_data["pit"]:
                        for pit in session_data["pit"]:
                            cursor.execute("""
                                INSERT INTO pit_stops (
                                    session_key, meeting_key, driver_number, 
                                    lap_number, pit_duration
                                ) VALUES (%s, %s, %s, %s, %s)
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                pit.get('driver_number'),
                                pit.get('lap_number'),
                                pit.get('pit_duration')
                            ))
                    
                    # Insert stints for this session - OPTIMIZED FIELDS ONLY
                    if "stints" in session_data and session_data["stints"]:
                        for stint in session_data["stints"]:
                            cursor.execute("""
                                INSERT INTO stints (
                                    session_key, meeting_key, driver_number,
                                    compound, lap_start, lap_end, tyre_age_at_start
                                ) VALUES (%s, %s, %s, %s, %s, %s, %s)
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                stint.get('driver_number'),
                                stint.get('compound'),
                                stint.get('lap_start'),
                                stint.get('lap_end'),
                                stint.get('tyre_age_at_start')
                            ))
                    
                    # Insert positions for this session - OPTIMIZED FIELDS ONLY
                    if "position" in session_data and session_data["position"]:
                        for pos in session_data["position"]:
                            cursor.execute("""
                                INSERT INTO positions (
                                    session_key, meeting_key, driver_number, 
                                    position, date
                                ) VALUES (%s, %s, %s, %s, %s)
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                pos.get('driver_number'),
                                pos.get('position'),
                                pos.get('date')
                            ))
                    
                    # Insert weather for this session - OPTIMIZED FIELDS ONLY
                    if "weather" in session_data and session_data["weather"]:
                        for weather in session_data["weather"]:
                            cursor.execute("""
                                INSERT INTO weather (
                                    session_key, meeting_key, air_temperature,
                                    track_temperature, humidity, rainfall, date
                                ) VALUES (%s, %s, %s, %s, %s, %s, %s)
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                weather.get('air_temperature'),
                                weather.get('track_temperature'),
                                weather.get('humidity'),
                                bool(weather.get('rainfall', 0)),  # Convert int to bool
                                weather.get('date')
                            ))
                    
                    if "intervals" in session_data and session_data["intervals"]:
                        for interval in session_data["intervals"]:
                            cursor.execute("""
                                INSERT INTO intervals (
                                    session_key, meeting_key, driver_number, 
                                    gap_to_leader, interval, date
                                ) VALUES (%s, %s, %s, %s, %s, %s)
                            """, (
                                session_info.get('session_key'),
                                meeting_info.get('meeting_key'),
                                interval.get('driver_number'),
                                interval.get('gap_to_leader'),
                                interval.get('interval'),
                                interval.get('date')
                            ))
            
            # IMPORTANT: Explicitly commit the transaction
            conn.commit()
            self.logger.info(f"Successfully inserted meeting {meeting_info.get('meeting_key')} data with optimized schema")
            
        except Exception as e:
            conn.rollback()
            self.logger.error(f"Error inserting meeting data: {e}")
            raise
        finally:
            cursor.close()
            conn.close()

In [45]:
# Simple discovery - just run this cell
config_manager = ConfigurationManager()
data_ingestion_config = config_manager.get_data_ingestion_config()
api_client = OpenF1APIClient(data_ingestion_config)

# Get all meetings
meetings = api_client.get_meetings()

# Filter for 2025
meetings_2025 = [m for m in meetings if m.get('year') == 2025]
meetings_2025.sort(key=lambda x: x.get('date_start', ''))

print(f"Found {len(meetings_2025)} meetings in 2025:")
for meeting in meetings_2025:
    print(f"  - {meeting.get('meeting_name')} (Key: {meeting.get('meeting_key')})")

[2025-07-02 18:25:40,526: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-07-02 18:25:40,527: INFO: 1829873344: Making request to: https://api.openf1.org/v1/meetings]
[2025-07-02 18:25:40,841: INFO: 1829873344: Successfully retrieved 60 records from meetings]
Found 12 meetings in 2025:
  - Pre-Season Testing (Key: 1253)
  - Australian Grand Prix (Key: 1254)
  - Chinese Grand Prix (Key: 1255)
  - Japanese Grand Prix (Key: 1256)
  - Bahrain Grand Prix (Key: 1257)
  - Saudi Arabian Grand Prix (Key: 1258)
  - Miami Grand Prix (Key: 1259)
  - Emilia Romagna Grand Prix (Key: 1260)
  - Monaco Grand Prix (Key: 1261)
  - Spanish Grand Prix (Key: 1262)
  - Canadian Grand Prix (Key: 1263)
  - Austrian Grand Prix (Key: 1264)


In [46]:
# Load configuration
config_manager = ConfigurationManager()
data_ingestion_config = config_manager.get_data_ingestion_config()
db_config = DatabaseConfig()

# Create API client and database ingestion
api_client = OpenF1APIClient(data_ingestion_config)
db_ingestion = DatabaseIngestion(data_ingestion_config, db_config, api_client)

# Create tables first
db_ingestion.create_tables()

# Discover available 2025 meetings
available_meetings = discover_2025_meetings()

# Ingest data for each meeting
for meeting in available_meetings:
    meeting_key = str(meeting.get('meeting_key'))
    meeting_name = meeting.get('meeting_name')
    
    print(f"\n=== Ingesting {meeting_name} (Key: {meeting_key}) ===")
    
    # Create data ingestion instance for this meeting
    data_ingestion = DataIngestion(data_ingestion_config, api_client)
    
    # Ingest data for this meeting
    meeting_data = data_ingestion.ingest_meeting_data(meeting_key)
    
    # Insert into database
    if meeting_data:
        db_ingestion.insert_meeting_data(meeting_data)
        print(f"✅ {meeting_name} data successfully ingested!")
    else:
        print(f"❌ Failed to ingest {meeting_name}")

print("\n🎉 All 2025 data ingestion completed!")

[2025-07-02 18:25:40,847: INFO: common: yaml file: config/config.yaml loaded successfully]
[2025-07-02 18:25:40,923: INFO: 1957311327: Successfully connected to PostgreSQL database]
[2025-07-02 18:25:41,000: INFO: 1957311327: Optimized database tables created successfully]
[2025-07-02 18:25:41,000: INFO: 1829873344: Making request to: https://api.openf1.org/v1/meetings]
[2025-07-02 18:25:41,342: INFO: 1829873344: Successfully retrieved 60 records from meetings]
Found 12 meetings in 2025:
  - Pre-Season Testing (Key: 1253)
  - Australian Grand Prix (Key: 1254)
  - Chinese Grand Prix (Key: 1255)
  - Japanese Grand Prix (Key: 1256)
  - Bahrain Grand Prix (Key: 1257)
  - Saudi Arabian Grand Prix (Key: 1258)
  - Miami Grand Prix (Key: 1259)
  - Emilia Romagna Grand Prix (Key: 1260)
  - Monaco Grand Prix (Key: 1261)
  - Spanish Grand Prix (Key: 1262)
  - Canadian Grand Prix (Key: 1263)
  - Austrian Grand Prix (Key: 1264)

=== Ingesting Pre-Season Testing (Key: 1253) ===
[2025-07-02 18:25:41,

In [47]:
# Add this helper function for progress tracking
def check_ingestion_progress():
    """Check what data has been ingested so far"""
    db_ingestion = DatabaseIngestion(data_ingestion_config, db_config, api_client)
    conn = db_ingestion.connect_to_db()
    cursor = conn.cursor()
    
    # Check meetings by year
    cursor.execute("""
        SELECT year, COUNT(*) as meeting_count 
        FROM meetings 
        GROUP BY year 
        ORDER BY year DESC
    """)
    
    results = cursor.fetchall()
    print("=== INGESTION PROGRESS ===")
    for year, count in results:
        print(f"  {year}: {count} meetings")
    
    cursor.close()
    conn.close()

# Run progress check
check_ingestion_progress()

[2025-07-02 18:33:28,699: INFO: 1957311327: Successfully connected to PostgreSQL database]
=== INGESTION PROGRESS ===
  2025: 12 meetings


In [52]:
# Add this debugging cell (Updated)
def check_database_data():
    """Check what data is actually in the database"""
    # Create API client for the DatabaseIngestion
    api_client = OpenF1APIClient(data_ingestion_config)
    db_ingestion = DatabaseIngestion(data_ingestion_config, db_config, api_client)
    
    conn = db_ingestion.connect_to_db()
    cursor = conn.cursor()
    
    # Check meetings table
    cursor.execute("SELECT COUNT(*) FROM meetings")
    meetings_count = cursor.fetchone()[0]
    print(f"Meetings in database: {meetings_count}")
    
    if meetings_count > 0:
        cursor.execute("SELECT meeting_key, meeting_name, year FROM meetings")
        meetings = cursor.fetchall()
        print("Meetings found:")
        for meeting in meetings:
            print(f"  - {meeting[1]} ({meeting[2]}) - Key: {meeting[0]}")
    
    # Check sessions table
    cursor.execute("SELECT COUNT(*) FROM sessions")
    sessions_count = cursor.fetchone()[0]
    print(f"\nSessions in database: {sessions_count}")
    
    if sessions_count > 0:
        cursor.execute("SELECT session_key, session_name, session_type FROM sessions LIMIT 5")
        sessions = cursor.fetchall()
        print("Sessions found:")
        for session in sessions:
            print(f"  - {session[1]} ({session[2]}) - Key: {session[0]}")
    
    # Check other tables
    tables = ['drivers', 'laps', 'pit_stops', 'stints', 'positions','intervals', 'race_control']
    for table in tables:
        cursor.execute(f"SELECT COUNT(*) FROM {table}")
        count = cursor.fetchone()[0]
        print(f"{table.capitalize()} in database: {count}")
    
    cursor.close()
    conn.close()

# Run the check
check_database_data()

[2025-07-03 11:05:54,195: INFO: 1957311327: Successfully connected to PostgreSQL database]


Meetings in database: 12
Meetings found:
  - Pre-Season Testing (2025) - Key: 1253
  - Australian Grand Prix (2025) - Key: 1254
  - Chinese Grand Prix (2025) - Key: 1255
  - Japanese Grand Prix (2025) - Key: 1256
  - Bahrain Grand Prix (2025) - Key: 1257
  - Saudi Arabian Grand Prix (2025) - Key: 1258
  - Miami Grand Prix (2025) - Key: 1259
  - Emilia Romagna Grand Prix (2025) - Key: 1260
  - Monaco Grand Prix (2025) - Key: 1261
  - Spanish Grand Prix (2025) - Key: 1262
  - Canadian Grand Prix (2025) - Key: 1263
  - Austrian Grand Prix (2025) - Key: 1264

Sessions in database: 53
Sessions found:
  - Practice 1 (Practice) - Key: 9686
  - Practice 2 (Practice) - Key: 9687
  - Practice 3 (Practice) - Key: 9688
  - Qualifying (Qualifying) - Key: 9689
  - Race (Race) - Key: 9693
Drivers in database: 1059
Laps in database: 29981
Pit_stops in database: 4206
Stints in database: 4522
Positions in database: 36834
Intervals in database: 288343


UndefinedTable: relation "race_control" does not exist
LINE 1: SELECT COUNT(*) FROM race_control
                             ^
