# F1 Insights Pro Python Script

Authors: 
    
    María Isabel Rivera Portillo (23200796)
    Diego Aymerich Pañero (23205295)
    Chetan Damodar Nandanwar (2320097)
    Alfred  John (23201995)

Welcome to F1 Insights Pro Python Script. 
Through this python code, you will be able to compute the F1 Insights Pro Proposed Ranking, as well as retrieve F1 Data Information from the Python Library 'fastf1'

# 1. Environmental Setup

In [1]:
# In case the user has not used the following libraries, please remove the # and execute the code. 

#!pip install fastf1
#!pip install os
#!pip install pandas
#!pip install numpy
#!pip install collection

In [2]:
# Libraries Import
import os
import pandas as pd
import numpy as np
import fastf1
from collections import defaultdict

In [3]:
# Create a cache directory in the Colab file system
cache_dir = '/content/cache_directory'
os.makedirs(cache_dir, exist_ok=True)

In [4]:
# Enable caching
fastf1.Cache.enable_cache(cache_dir)

# 2. Retrieve Data from fastf1 library

In [5]:
 # Function to collect race results and calculate scores
def collect_race_results(year, race):
    session = fastf1.get_session(year, race, 'R')
    session.load()

    ### TRACK COMPLEXITY ###
    # Extract track data
    track_info = track_complexity[race]

    # Calculate track complexity score
    track_score = (
        (track_info['Turns'] / 30) +  # Assuming 30 as the max number of turns
        (track_info['DRSZones'] / 4) +  # Assuming 4 as the max number of DRSZones
        (track_info['ElevationChange'] / 5)  # Assuming 5 meters as a significant elevation change
    ) / 3 * weights['TrackComplexity']

    ### WEATHER CONDITIONS ###
    # Extract weather data
    weather_data = session.weather_data
    avg_track_temp = weather_data['TrackTemp'].mean()
    avg_air_temp = weather_data['AirTemp'].mean()
    avg_humidity = weather_data['Humidity'].mean()
    avg_wind_speed = weather_data['WindSpeed'].mean()
    rain_probability = weather_data['Rainfall'].mean()

    # Normalize weather scores (more extreme conditions = higher score)
    max_track_temp = 50  # Assuming 50°C as an extreme track temperature
    max_air_temp = 35    # Assuming 40°C as an extreme air temperature
    max_humidity = 100   # Assuming 100% as an extreme humidity
    max_wind_speed = 50  # Assuming 50 km/h as an extreme wind speed
    max_rain_probability = 100  # Assuming 100% as an extreme rain probability

    # Calculate weather score
    weather_score = (
        avg_track_temp / max_track_temp +
        avg_air_temp / max_air_temp +
        avg_humidity / max_humidity +
        avg_wind_speed / max_wind_speed +
        rain_probability / max_rain_probability
    ) / 5 * weights['WeatherConditions']

    ### OVERALL FASTEST LAP ###
    overall_fastest_lap = session.laps.pick_fastest()

    ### TOTAL OVERTAKES ###
    total_overtakes = sum((session.laps['Position'].diff() > 0).astype(int))

    ### DRIVER PERFORMANCE ###
    driver_data = []

    for driver in session.results['Abbreviation']:
        laps = session.laps.pick_driver(driver)
        fastest_lap = laps.pick_fastest()
        fastest_lap_time = fastest_lap['LapTime']
        fastest_lap_speed = fastest_lap['SpeedST'] if 'SpeedST' in fastest_lap else np.nan
        overtakes = (laps['Position'].diff() > 0).sum()

        result = session.results.loc[session.results['Abbreviation'] == driver].iloc[0]
        start_position = result['GridPosition']
        finish_position = result['Position']
        points_scored = result['Points']

        # Calculate fastest lap gap
        overall_fastest_lap_time = overall_fastest_lap['LapTime'].total_seconds()
        fastest_lap_gap = fastest_lap_time.total_seconds() - overall_fastest_lap['LapTime'].total_seconds() if pd.notnull(fastest_lap_time) else float('inf')

        # Replace fastest lap flag with 1 if driver has the fastest lap (even if he didn't finished in the top 10)
        fastest_lap_flag = 0
        if fastest_lap_time == overall_fastest_lap['LapTime']:
            fastest_lap_flag = 1

        # Add point for fastest lap if driver has the fastest lap and finishes in the top 10
        if fastest_lap_time == overall_fastest_lap['LapTime'] and finish_position <= 10:
            points_scored += 1

        # Calculate individual scores
        start_score = (20 - start_position) * weights['StartPosition']
        finish_score = (20 - finish_position) * weights['FinishPosition']
        points_score = points_scored * weights['PointsScored']
        fastest_lap_score = (1 / (1 + fastest_lap_gap)) * weights['FastestLapGap'] if pd.notnull(fastest_lap_gap) else 0
        overtake_score = (overtakes / total_overtakes) * weights['Overtakes']

        # Calculate total driver performance score
        driver_performance_score = start_score + finish_score + points_score + fastest_lap_score + overtake_score

        total_score = driver_performance_score + track_score + weather_score

        driver_data.append({
            'Driver': result['FullName'],
            'Team': result['TeamName'],
            'Track': race,
            'TrackTurns': track_info['Turns'],
            'TrackDRSZones': track_info['DRSZones'],
            'ElevationChange': track_info['ElevationChange'],
            'AvgTrackTemp': avg_track_temp,
            'AvgAirTemp': avg_air_temp,
            'AvgHumidity': avg_humidity,
            'AvgWindSpeed': avg_wind_speed,
            'RainProbability': rain_probability,
            'StartPosition': start_position,
            'FinishPosition': finish_position,
            'PointsScored': points_scored,
            'FastestLapFlag': fastest_lap_flag,
            'OverallFastestLap': overall_fastest_lap_time,
            'FastestLapTime': fastest_lap_time.total_seconds() if pd.notnull(fastest_lap_time) else np.nan,
            'FastestLapGap': fastest_lap_gap,
            'FastestLapSpeed': fastest_lap_speed,
            'Overtakes': overtakes,
            'TotalOvertakes': total_overtakes,
            'TrackComplexity': track_score,
            'WeatherConditions': weather_score,
            'DriverPerformanceScore': driver_performance_score,
            'Score': total_score
        })

    # Sort by score and assign rank
    driver_data = sorted(driver_data, key=lambda x: x['Score'], reverse=True)
    for rank, driver in enumerate(driver_data, start=1):
        driver['Rank'] = rank

    return driver_data

In [6]:
# Definition of weights for each ranking component 
weights = {
    'TrackComplexity': 0.1,
    'WeatherConditions': 0.1,
    'DriverPerformance': 0.8,
    'StartPosition': 0.15,
    'FinishPosition': 0.20,
    'PointsScored': 0.10,
    'FastestLapGap': 0.20,
    'Overtakes': 0.15
}

# 3. 2022 F1 Insights Pro Ranking 

In [7]:
# Track Complexity Data for 2022 Season
track_complexity = {'Bahrain': {'Turns': 15, 'DRSZones': 3, 'ElevationChange': 3.5},
     'Saudi Arabia': {'Turns': 27, 'DRSZones': 3, 'ElevationChange': 2.5},
     'Australia': {'Turns': 14, 'DRSZones': 4, 'ElevationChange': 2.0},
     'Emilia Romagna': {'Turns': 19, 'DRSZones': 1, 'ElevationChange': 4.5},
     'Miami': {'Turns': 19, 'DRSZones': 3, 'ElevationChange': 1.5},
     'Spain': {'Turns': 16, 'DRSZones': 2, 'ElevationChange': 3.5},
     'Monaco': {'Turns': 19, 'DRSZones': 1, 'ElevationChange': 3.0},
     'Azerbaijan': {'Turns': 20, 'DRSZones': 2, 'ElevationChange': 3.5},
     'Canada': {'Turns': 14, 'DRSZones': 2, 'ElevationChange': 1.0},
     'Great Britain': {'Turns': 18, 'DRSZones': 2, 'ElevationChange': 2.0},
     'Austria': {'Turns': 10, 'DRSZones': 3, 'ElevationChange': 3.5},
     'France': {'Turns': 15, 'DRSZones': 2, 'ElevationChange': 2.0},
     'Hungary': {'Turns': 14, 'DRSZones': 1, 'ElevationChange': 2.5},
     'Belgium': {'Turns': 19, 'DRSZones': 2, 'ElevationChange': 4.5},
     'Netherlands': {'Turns': 14, 'DRSZones': 2, 'ElevationChange': 2.0},
     'Italy': {'Turns': 11, 'DRSZones': 2, 'ElevationChange': 1.5},
     'Singapore': {'Turns': 23, 'DRSZones': 2, 'ElevationChange': 2.5},
     'Japan': {'Turns': 18, 'DRSZones': 1, 'ElevationChange': 3.0},
     'United States': {'Turns': 20, 'DRSZones': 2, 'ElevationChange': 2.5},
     'Mexico': {'Turns': 17, 'DRSZones': 2, 'ElevationChange': 1.0},
     'Brazil': {'Turns': 15, 'DRSZones': 2, 'ElevationChange': 4.0},
     'Abu Dhabi': {'Turns': 16, 'DRSZones': 2, 'ElevationChange': 1.0}}

In [8]:
#2022 races
races = ['Bahrain', 'Saudi Arabia', 'Australia', 'Emilia Romagna', 'Miami',
         'Spain', 'Monaco', 'Azerbaijan', 'Canada', 'Great Britain',
         'Austria', 'France', 'Hungary', 'Belgium', 'Netherlands',
         'Italy', 'Singapore', 'Japan', 'United States', 'Mexico',
         'Brazil', 'Abu Dhabi']

In [9]:
# Collect results and calculate scores for the 2022 season
all_driver_data = []
for race in races:
    race_data = collect_race_results(2022, race)
    all_driver_data.extend(race_data)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.3.8]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']
core           INFO 	Loading data for Saudi Arabian Grand Prix

req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '63', '44', '10', '5', '14', '3', '4', '31', '77', '23', '22', '47', '6', '18', '20', '24', '16', '55']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.3.8]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data

req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '63', '16', '44', '11', '14', '4', '55', '31', '18', '10', '23', '47', '5', '20', '24', '3', '6', '77', '22']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.3.8]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_

core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '55', '63', '4', '31', '18', '3', '5', '22', '24', '23', '10', '77', '47', '20', '44', '6', '14']


In [10]:
# Convert to DataFrame
df2022 = pd.DataFrame(all_driver_data)

In [11]:
# Save the DataFrame to a CSV file
df2022.to_csv('F1_Insights_Pro_Ranking_2022.csv', index=False)

# 4. 2023 F1 Insights Pro Ranking 

In [12]:
# Track Complexity Data for 2023 Season
track_complexity = {
    'Bahrain': {'Turns': 15, 'DRSZones': 3, 'ElevationChange': 3.5},
    'Saudi Arabia': {'Turns': 27, 'DRSZones': 3, 'ElevationChange': 2.5},
    'Australia': {'Turns': 14, 'DRSZones': 4, 'ElevationChange': 2.0},
    'Azerbaijan': {'Turns': 20, 'DRSZones': 2, 'ElevationChange': 3.5},
    'Miami': {'Turns': 19, 'DRSZones': 3, 'ElevationChange': 1.5},
    'Emilia Romagna': {'Turns': 19, 'DRSZones': 1, 'ElevationChange': 4.5},
    'Monaco': {'Turns': 19, 'DRSZones': 1, 'ElevationChange': 3.0},
    'Spain': {'Turns': 16, 'DRSZones': 2, 'ElevationChange': 3.5},
    'Canada': {'Turns': 14, 'DRSZones': 2, 'ElevationChange': 1.0},
    'Austria': {'Turns': 10, 'DRSZones': 3, 'ElevationChange': 3.5},
    'Great Britain': {'Turns': 18, 'DRSZones': 2, 'ElevationChange': 2.0},
    'Hungary': {'Turns': 14, 'DRSZones': 1, 'ElevationChange': 2.5},
    'Belgium': {'Turns': 19, 'DRSZones': 2, 'ElevationChange': 4.5},
    'Netherlands': {'Turns': 14, 'DRSZones': 2, 'ElevationChange': 2.0},
    'Italy': {'Turns': 11, 'DRSZones': 2, 'ElevationChange': 1.5},
    'Singapore': {'Turns': 23, 'DRSZones': 2, 'ElevationChange': 2.5},
    'Japan': {'Turns': 18, 'DRSZones': 1, 'ElevationChange': 3.0},
    'Qatar': {'Turns': 16, 'DRSZones': 2, 'ElevationChange': 1.0},
    'United States': {'Turns': 20, 'DRSZones': 2, 'ElevationChange': 2.5},
    'Mexico': {'Turns': 17, 'DRSZones': 2, 'ElevationChange': 1.0},
    'Brazil': {'Turns': 15, 'DRSZones': 2, 'ElevationChange': 4.0},
    'Las Vegas': {'Turns': 17, 'DRSZones': 2, 'ElevationChange': 1.5},
    'Abu Dhabi': {'Turns': 16, 'DRSZones': 2, 'ElevationChange': 1.0}
}

In [13]:
# 2023 races
races = ['Bahrain', 'Saudi Arabia', 'Australia', 'Azerbaijan', 'Miami',
         'Emilia Romagna', 'Monaco', 'Spain', 'Canada', 'Austria',
         'Great Britain', 'Hungary', 'Belgium', 'Netherlands', 'Italy',
         'Singapore', 'Japan', 'Qatar', 'United States', 'Mexico',
         'Brazil', 'Las Vegas', 'Abu Dhabi']

In [None]:
# Collect results and calculate scores for the 2023 season
all_driver_data = []
for race in races:
    race_data = collect_race_results(2023, race)
    all_driver_data.extend(race_data)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.3.8]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']
core           INFO 	Loading data for Saudi Arabian Grand Pri

In [None]:
# Convert to DataFrame
df2023 = pd.DataFrame(all_driver_data)

In [None]:
# Save the DataFrame to a CSV file
df2023.to_csv('F1_Insights_Pro_Ranking_2023.csv', index=False)