In [1]:
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, TyreRaceData, Team, DriverTeamSession, TeamCircuitStats

import gym
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, text
from gym import spaces
from stable_baselines3 import PPO

class F1RaceEnv(gym.Env):
    def __init__(self):
        super(F1RaceEnv, self).__init__()
        
        # Connect to the database
        self.engine = create_engine('sqlite:////home/ben/Individual_Project/DB/f1_data.db')

        # Action space: [Pit or not, Tire choice: 0-3]
        self.action_space = spaces.MultiDiscrete([2, 3])  # 2 options (Pit: Yes/No), 3 tire compounds (Soft, Medium, Hard)
        
        # Observation space: [Lap Number, Tire Wear, Position, Gap to Car Ahead, Weather]
        self.observation_space = spaces.Box(low=0, high=100, shape=(5,), dtype=np.float32)

        # Load race data
        self.load_race_data()
        
        self.current_lap = 0
        self.driver_tires = []  # Stores used tire compounds
        self.total_time = 0

    def load_race_data(self):
        query = text("""
            SELECT lap_num, lap_time, tyre, tyre_laps, pit, rainfall 
            FROM laps 
            WHERE driver_id = :driver_id
        """)
        with self.engine.connect() as conn:
            self.race_data = pd.read_sql(query, conn, params={'driver_id': 1})  # Example driver_id = 1

    def step(self, action):
        pit, tyre_choice = action

        lap_time = self.race_data.loc[self.current_lap, 'lap_time']
        pit_time = 25 if pit else 0  # Assume pit stops take ~25 seconds

        # Apply tire degradation penalty
        if len(self.driver_tires) > 0 and self.driver_tires[-1] == tyre_choice:
            lap_time += 0.5  # Slight penalty for worn tires

        # Update state
        self.current_lap += 1
        self.total_time += lap_time + pit_time
        if pit:
            self.driver_tires.append(tyre_choice)

        # Enforce F1 rule: At least two different tire compounds must be used
        if self.current_lap == len(self.race_data) - 1:
            if len(set(self.driver_tires)) < 2:
                reward = -50  # Large penalty for breaking the rule
            else:
                reward = -self.total_time  # Goal: Minimize total time
        else:
            reward = -lap_time  # Lower time = better reward

        done = self.current_lap >= len(self.race_data) - 1
        obs = np.array([self.current_lap, self.race_data.loc[self.current_lap, 'tyre_laps'], 
                        10, 5, self.race_data.loc[self.current_lap, 'rainfall']], dtype=np.float32)

        return obs, reward, done, {}

    def reset(self):
        self.current_lap = 0
        self.total_time = 0
        self.driver_tires = []
        return np.array([0, 0, 10, 5, 0], dtype=np.float32)

    def render(self, mode="human"):
        print(f"Lap: {self.current_lap}, Total Time: {self.total_time:.2f}s")



2025-01-29 11:12:40.314698: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738149160.350634   55728 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738149160.360873   55728 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-29 11:12:40.424547: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
env = F1RaceEnv()
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)
model.save("f1_rl_model")


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------
| time/              |      |
|    fps             | 107  |
|    iterations      | 1    |
|    time_elapsed    | 19   |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 2            |
|    time_elapsed         | 44           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0019572799 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.79        |
|    explained_variance   | 0.000166     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.23e+06     |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.000372    |
|    value_loss           | 2.57e+06     |
------------------------------------------


ValueError: Expected parameter logits (Tensor of shape (1, 2)) of distribution Categorical(logits: torch.Size([1, 2])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan]], device='cuda:0')

In [None]:
model = PPO.load("f1_rl_model")
obs = env.reset()
done = False

while not done:
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()
