In [None]:
import requests
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
import os

In [None]:
class F1LapTimeCollector:
    def __init__(self, start_year=2010, end_year=2023):
        """
        Initialize the F1 Lap Time Collector
        
        Parameters:
        - start_year: Beginning of data collection year
        - end_year: End of data collection year
        """
        self.base_url = "http://ergast.com/api/f1"  # Ergast API base URL
        self.start_year = start_year
        self.end_year = end_year
        self.lap_time_data = []  # Store collected lap time data

    def fetch_lap_times(self, year, round_num):
        """
        Fetch lap times for a specific race
        
        Parameters:
        - year: Race year
        - round_num: Race round number
        
        Returns:
        - JSON data of lap times or None if fetch fails
        """
        url = f"{self.base_url}/{year}/{round_num}/laps.json?limit=10000"
        response = requests.get(url)
        return response.json() if response.status_code == 200 else None

    def collect_lap_times(self):
        """
        Collect comprehensive lap time data across multiple years and races
        
        Process:
        - Iterate through years and race rounds
        - Fetch lap time data for each race
        - Simulate additional performance factors
        """
        for year in tqdm(range(self.start_year, self.end_year + 1), desc="Years"):
            # Fetch total rounds for the year
            rounds_url = f"{self.base_url}/{year}.json"
            rounds_response = requests.get(rounds_url)
            total_rounds = int(rounds_response.json()['MRData']['total'])

            # Iterate through each round in the year
            for round_num in tqdm(range(1, total_rounds + 1), desc=f"Rounds in {year}", leave=False):
                lap_times_data = self.fetch_lap_times(year, round_num)
                
                if lap_times_data:
                    # Extract race and circuit information
                    race = lap_times_data['MRData']['RaceTable']['Races'][0]
                    circuit = race['Circuit']

                    # Process each lap set
                    for lap_set in lap_times_data['MRData']['RaceTable']['Races'][0]['Laps']:
                        for timing in lap_set['Timings']:
                            # Create comprehensive lap entry with simulated features
                            lap_entry = {
                                # Track Features
                                'year': year,
                                'round': round_num,
                                'circuit_name': circuit['circuitName'],
                                'circuit_country': circuit['Location']['country'],
                                
                                # Lap Time Data
                                'driver_id': timing['driverId'],
                                'lap_number': lap_set['number'],
                                'lap_time_ms': self._convert_lap_time_to_ms(timing['time']),
                                
                                # Simulated Performance Factors
                                'estimated_horsepower': np.random.randint(750, 1000),
                                'car_weight': np.random.randint(700, 800),
                                'track_temperature': np.random.randint(20, 45),
                                'driving_aggression': np.random.uniform(0.5, 1.0),
                                
                                # Additional Predictive Features
                                'tire_compound': np.random.choice(['soft', 'medium', 'hard']),
                                'fuel_load_percent': np.random.uniform(10, 100)
                            }
                            
                            self.lap_time_data.append(lap_entry)
                
                time.sleep(0.5)  # Respect API rate limits to avoid blocking

    def _convert_lap_time_to_ms(self, time_str):
        """
        Convert lap time string to milliseconds
        
        Parameters:
        - time_str: Lap time in 'MM:SS.mmm' format
        
        Returns:
        - Lap time in milliseconds
        """
        try:
            minutes, seconds = time_str.split(':')
            return int(float(minutes) * 60000 + float(seconds) * 1000)
        except:
            return np.nan

    def save_dataset(self, filename='f1_lap_time_dataset.csv'):
        """
        Save collected lap time data to CSV
        
        Parameters:
        - filename: Output CSV filename
        
        Returns:
        - Pandas DataFrame of collected data
        """
        df = pd.DataFrame(self.lap_time_data)
        desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop', filename)
        df.to_csv(desktop_path, index=False)
        print(f"\nDataset saved to {desktop_path}")
        print(f"Total lap time records: {len(df)}")
        return df

In [None]:
def main():
    """
    Main function to collect and save F1 lap time dataset
    """
    # Initialize collector for specific year range
    collector = F1LapTimeCollector(start_year=2010, end_year=2023)
    
    # Collect lap times
    collector.collect_lap_times()
    
    # Save dataset and display preview
    dataset = collector.save_dataset()
    print("\nDataset Preview:\n", dataset.head())

In [None]:
if __name__ == "__main__":
    main()