In [4]:
import pandas as pd

# Replace 'your_file.xlsx' with the path to your Excel file
df = pd.read_excel('driver_data_base_4_25.xlsx')

Empirical answer with the following assumptions:

No driver who is impossible to be the answer is picked.

"Best" is defined by lowest average number of guesses while always winning (any starting driver can always win we find)

In [5]:
import itertools

# Function to group by criteria
def group_by_criteria(row):
    driver, team, flag, birth_year, start_year, wins, car_number = row['driver'], row['team'], row['flag'], row['birth year'], row['start year'], row['wins'], row['car_number']
    
    # Define the conditions
    conditions = {
        'team': [
            (df['team'] == team), 
            (df['former_team'].str.contains(team) & (df['team'] != team)), 
            (~df['former_team'].str.contains(team) & (df['team'] != team))
        ],
        'flag': [(df['flag'] == flag), (df['flag'] != flag)],
        'birth_year': [(df['birth year'] == birth_year), (df['birth year'] < birth_year), (df['birth year'] > birth_year)],
        'start_year': [(df['start year'] == start_year), (df['start year'] < start_year), (df['start year'] > start_year)],
        'wins': [(df['wins'] == wins), (df['wins'] < wins), (df['wins'] > wins)],
        'car_number': [(df['car_number'] == car_number), (df['car_number'] > car_number), (df['car_number'] < car_number)],
    }
    
    # Generate all possible combinations of conditions
    all_conditions = [conditions[key] for key in conditions]
    all_combinations = list(itertools.product(*all_conditions))
    
    # Calculate max group size for all possible combinations
    max_group_size = 0
    for combination in all_combinations:
        # Reduce the combination to a single boolean mask using functools.reduce and logical AND
        combined_mask = pd.Series([True] * len(df))  # Start with a mask that is all True
        for condition in combination:
            combined_mask &= condition  # Apply logical AND to combine conditions
        
        # Exclude the current row (driver) from the group size calculation
        combined_mask &= (df['driver'] != driver)
        
        # Update max_group_size with the size of the current group
        current_group_size = combined_mask.sum()
        max_group_size = max(max_group_size, current_group_size)
        
    return max_group_size


In [6]:
# Apply function and create new column for largest group size
df['largest_group_size'] = df.apply(group_by_criteria, axis=1)

# Print the smallest value in the 'largest_group_size' column
print(df['largest_group_size'].min())


7


In [7]:
from collections import defaultdict
def criteria_function(driver, df):
    # Get the specific driver details for comparison
    driver_details = df[df['driver'] == driver].iloc[0]

    groups = defaultdict(list)
    for _, row in df.iterrows():
        groups[
            (
                'same team' if row['team'] == driver_details['team'] else 
                'previous team' if driver_details['team'] in row['former_team'].split(', ') else 
                'different team',
                'same flag' if row['flag'] == driver_details['flag'] else 'different flag',
                'same birth year' if row['birth year'] == driver_details['birth year'] else 'later birth year' if row['birth year'] > driver_details['birth year'] else 'earlier birth year',
                'same start year' if row['start year'] == driver_details['start year'] else 'later start year' if row['start year'] > driver_details['start year'] else 'earlier start year',
                'same wins' if row['wins'] == driver_details['wins'] else 'more wins' if row['wins'] > driver_details['wins'] else 'fewer wins',
                'same car number' if row['car_number'] == driver_details['car_number'] else 'higher car number' if row['car_number'] > driver_details['car_number'] else 'lower car number'
            )
        ].append(row['driver'])

    # Convert the defaultdict to a standard dict and remove the key for the driver in question
    groups = dict(groups)
    group_key_for_driver = next((k for k, v in groups.items() if driver in v), None)
    if group_key_for_driver is not None:
        groups.pop(group_key_for_driver)

    answer = []
    group_size_sum = 0
    num_groups = 0
    for key in groups:
        answer.append(groups[key])
        group_size_sum += len(groups[key])
        num_groups += 1
    #ave_group_size = group_size_sum/num_groups
    return answer #, ave_group_size


In [8]:
def longest_path_in_shortest_tree(drivers, df, criteria_function, path_length=1):
    long_term_sum = 50000
    long_term_num = 1
    # If only one driver is left, we have reached a leaf node
    if len(drivers) <= 3:
        # we'll avoid recurssion because these cases are deterministic.
        if len(drivers) == 1:
            return path_length, path_length, 1
        elif len(drivers) == 2:
            longest_path = path_length+1
            long_term_sum = path_length + path_length+1
            long_term_num = 2
            return longest_path, long_term_sum, long_term_num
        elif len(drivers) == 3:
            # pick the middle car_number or either car_number that is in a tie and it will force the below situation, so we shortcut
            longest_path = path_length+1
            long_term_sum = path_length + path_length+1 + path_length+1
            long_term_num = 3
            return longest_path, long_term_sum, long_term_num
        else:
            raise ValueError('A value error occurred')
    longest_path = float('inf')
    best_driver = 'me'
    for driver in drivers:
        sum_lengths = path_length # These account for getting the driver correct
        num_lengths = 1
        # Generate groups using the criteria_function
        groups = criteria_function(driver, df)
        # If criteria_function results in a group of size 1,
        # it means we have identified a unique driver
        temp_longest_path = 1
        for subgroup in groups:
            if len(subgroup) > 3:
                subgroup_df = df[df['driver'].isin(subgroup)]
                path, temp_sum, temp_num = longest_path_in_shortest_tree(subgroup, subgroup_df, criteria_function, path_length + 1)
                if path > temp_longest_path:
                    sum_lengths += temp_sum
                    num_lengths += temp_num
                    temp_longest_path = path
            else:
                # Our answers can be determined without further recurssion.
                if len(subgroup) == 3:
                    temp_longest_path = max(temp_longest_path, path_length + 3)
                    sum_lengths += 3*path_length + 5 # one will be right, two will be wrong, so one will be path_length +1; the other 2, path_length +2
                    num_lengths += 3
                elif len(subgroup) == 2:
                    temp_longest_path = max(temp_longest_path, path_length + 2)
                    sum_lengths += 2*path_length + 3 # one will be right, one will be wrong, so one will be path_length +1; the other path_length +2
                    num_lengths += 2
                elif len(subgroup) == 1:
                    temp_longest_path = max(temp_longest_path, path_length + 1)
                    sum_lengths += path_length + 1
                    num_lengths += 1
                else:
                    raise ValueError('A value error occurred')
        if longest_path > temp_longest_path:
            longest_path = temp_longest_path
            # I am forcing us to use average for the shortest tree here.
            long_term_sum = sum_lengths
            long_term_num = num_lengths
        elif longest_path == temp_longest_path:
            # This will only replace the average if it's the best of the shortest trees.
            if sum_lengths/num_lengths < long_term_sum/long_term_num:
                long_term_sum = sum_lengths
                long_term_num = num_lengths
    return longest_path, long_term_sum, long_term_num

In [9]:
drivers = ['Romain Grosjean',
  'Marcus Ericsson',
  'Nikita Mazepin',
  'Kamui Kobayashi',
  'Pierre Gasly',
  'Sergio Perez',
  'Felipe Nasr',
  'Pastor Maldonado',
  'Jules Bianchi',
  'Lance Stroll',
  'Felipe Massa',
  'Kevin Magnussen',
  'Esteban Gutierrez',
  'Nyck de Vries',
  'Yuki Tsunoda',
  'Alexander Albon',
  'Jean-Eric Vergne',
  'Daniil Kvyat',
  'Nico Hulkenberg',
  'Will Stevens',
  'Brendon Hartley',
  'Jolyon Palmer',
  'Esteban Ocon',
  'Sergey Sirotkin',
  'Oliver Bearman',
  'Liam Lawson',
  'Paul di Resta',
  'Andre Lotterer',
  'Mick Schumacher',
  'Pietro Fittipaldi',
  'Alexander Rossi',
  'Carlos Sainz',
  'George Russell',
  'Oscar Piastri',
  'Rio Haryanto',
  'Jack Aitken',
  'Pascal Wehrlein',
  'Roberto Merhi',
  'Adrian Sutil']
sub_df = df[df['driver'].isin(drivers)]
a, b, c = longest_path_in_shortest_tree(drivers, sub_df, criteria_function, path_length=2)
print(a, b/c)

5 3.125


In [10]:
criteria_function('Kimi Raikkonen', df)

[['Stoffel Vandoorne',
  'Logan Sargeant',
  'Daniel Ricciardo',
  'Max Chilton',
  'Lando Norris',
  'Nicholas Latifi'],
 ['Sebastian Vettel', 'Nico Rosberg'],
 ['Romain Grosjean',
  'Marcus Ericsson',
  'Nikita Mazepin',
  'Kamui Kobayashi',
  'Pierre Gasly',
  'Sergio Perez',
  'Felipe Nasr',
  'Pastor Maldonado',
  'Jules Bianchi',
  'Lance Stroll',
  'Felipe Massa',
  'Kevin Magnussen',
  'Esteban Gutierrez',
  'Nyck de Vries',
  'Yuki Tsunoda',
  'Alexander Albon',
  'Jean-Eric Vergne',
  'Daniil Kvyat',
  'Nico Hulkenberg',
  'Will Stevens',
  'Brendon Hartley',
  'Jolyon Palmer',
  'Esteban Ocon',
  'Sergey Sirotkin',
  'Oliver Bearman',
  'Liam Lawson',
  'Paul di Resta',
  'Andre Lotterer',
  'Mick Schumacher',
  'Pietro Fittipaldi',
  'Alexander Rossi',
  'Carlos Sainz',
  'George Russell',
  'Oscar Piastri',
  'Rio Haryanto',
  'Jack Aitken',
  'Pascal Wehrlein',
  'Roberto Merhi',
  'Adrian Sutil'],
 ['Fernando Alonso'],
 ['Charles Leclerc', 'Zhou Guanyu'],
 ['Jenson Butto

In [11]:
final_ave = float('inf')
winning_driver = []
final_answer = float('inf')
winning_ave_driver = []
for i in range(8, 9):
    answer = 1
    driver_sum = 1 # to handle the case we got it right initially as this isn't covered right now
    driver_num = 1
    for drivers in criteria_function(df['driver'].iloc[i], df):
        print(f"Drivers {drivers}")
        df['driver'].isin(drivers)
        sub_df = df[df['driver'].isin(drivers)]
        temp, temp_sum, temp_num = longest_path_in_shortest_tree(drivers, sub_df, criteria_function, path_length=2)
        driver_sum += temp_sum
        driver_num += temp_num
        if temp > answer:
            answer = temp
    print(f"answer {answer, driver_sum/driver_num, df['driver'].iloc[i]}")
    if answer == final_answer:
        winning_driver.append(df['driver'].iloc[i])
    if answer < final_answer:
        final_answer = answer
        winning_driver = [df['driver'].iloc[i]]
    if driver_sum/driver_num == final_ave:
        print('sum')
        print(df['driver'].iloc[i])
        winning_ave_driver.append(df['driver'].iloc[i])
    if driver_sum/driver_num < final_ave:
        print('sum')
        print(df['driver'].iloc[i])
        final_ave = driver_sum/driver_num
        winning_ave_driver = [df['driver'].iloc[i]]
print(final_ave, winning_ave_driver, final_answer, winning_driver)

Drivers ['Stoffel Vandoorne', 'Logan Sargeant', 'Daniel Ricciardo', 'Max Chilton', 'Lando Norris', 'Nicholas Latifi']
Drivers ['Sebastian Vettel', 'Nico Rosberg']
Drivers ['Romain Grosjean', 'Marcus Ericsson', 'Nikita Mazepin', 'Kamui Kobayashi', 'Pierre Gasly', 'Sergio Perez', 'Felipe Nasr', 'Pastor Maldonado', 'Jules Bianchi', 'Lance Stroll', 'Felipe Massa', 'Kevin Magnussen', 'Esteban Gutierrez', 'Nyck de Vries', 'Yuki Tsunoda', 'Alexander Albon', 'Jean-Eric Vergne', 'Daniil Kvyat', 'Nico Hulkenberg', 'Will Stevens', 'Brendon Hartley', 'Jolyon Palmer', 'Esteban Ocon', 'Sergey Sirotkin', 'Oliver Bearman', 'Liam Lawson', 'Paul di Resta', 'Andre Lotterer', 'Mick Schumacher', 'Pietro Fittipaldi', 'Alexander Rossi', 'Carlos Sainz', 'George Russell', 'Oscar Piastri', 'Rio Haryanto', 'Jack Aitken', 'Pascal Wehrlein', 'Roberto Merhi', 'Adrian Sutil']
Drivers ['Fernando Alonso']
Drivers ['Charles Leclerc', 'Zhou Guanyu']
Drivers ['Jenson Button']
Drivers ['Max Verstappen', 'Lewis Hamilton']


In [None]:
min_record = 100.0
driver_record = ''
for driver in df['driver']:
    temp = criteria_function(driver, df)[-1]
    print(driver, temp, min_record)
    if temp < min_record:
        min_record = temp
        driver_record = driver
print(driver_record)
print(min_record)

Now we build and use our robot player:

In [45]:
import random
import numpy as np
import gym
from gym import spaces
import pandas as pd

class F1MysteryDriverEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    
    def __init__(self, df):
        super(F1MysteryDriverEnv, self).__init__()
        
        self.df = df
        self.action_space = spaces.Discrete(len(df['driver']))  # Drivers are unique
        self.observation_space = spaces.Dict({
            "attempts": spaces.Discrete(6),  # A max of 6 attempts
            "feedback": spaces.MultiDiscrete([3] * 6)  # Feedback for each hint category
        })
        self.reset()

    def reset(self):
        self.state = {
            "mystery_driver": random.choice(self.df['driver']),
            "attempts": 0,
        }
        self.state["mystery_driver_info"] = self.df.loc[
            self.df['driver'] == self.state["mystery_driver"],
            ['car_number', 'start year', 'birth year', 'flag', 'team', 'wins']
        ].values[0]
        
        initial_observation = {
            "attempts": self.state["attempts"],
            "feedback": np.zeros(6)  # Initial feedback is all zeros (no information)
        }
        return initial_observation

    def step(self, action):
        assert self.action_space.contains(action)
        entered_driver = self.df['driver'].iloc[action]  # Map action to the driver name
        reward = 0
        done = False
        info = {}
        
        # Initialize feedback with whatever default you find suitable, e.g., np.zeros(6)
        feedback = np.zeros(6)
        
        if entered_driver == self.state["mystery_driver"]:
            reward = 500  # For choosing the correct driver
            done = True
            info["message"] = f"Correct! It is {self.state['mystery_driver_info']} YOU WIN!"
        else:
            self.state["attempts"] += 1
            feedback = self.get_feedback(entered_driver)  # Generate feedback based on the driver
            if self.state["attempts"] == 1:
                pass
            elif self.state["attempts"] < 6:
                reward = -100  # Negative reward for incorrect guess after the second (getting first is just luck so we don't want to differentiate between first and second)
            else:
                # The feedback calculation is already done above, so we can remove it from here
                reward = -100
                done = True
                info["message"] = "Game Over. Reached maximum attempts."
        
        observation = {
            "attempts": self.state["attempts"],
            "feedback": feedback
        }

        return observation, reward, done, info
    
    def get_feedback(self, entered_driver):
        feedback = np.zeros(6)  # We have 6 pieces of feedback

        # Number comparison
        entered_driver_number = self.df.loc[self.df['driver'] == entered_driver, 'car_number'].values[0]
        if entered_driver_number > self.state["mystery_driver_info"][0]:
            feedback[0] = -1
        elif entered_driver_number < self.state["mystery_driver_info"][0]:
            feedback[0] = 1

        # Birth year comparison
        entered_driver_birth_year = self.df.loc[self.df['driver'] == entered_driver, 'birth year'].values[0]
        if entered_driver_birth_year > self.state["mystery_driver_info"][2]:
            feedback[1] = -1
        elif entered_driver_birth_year < self.state["mystery_driver_info"][2]:
            feedback[1] = 1

        # Start year comparison
        entered_driver_start_year = self.df.loc[self.df['driver'] == entered_driver, 'start year'].values[0]
        if entered_driver_start_year > self.state["mystery_driver_info"][1]:
            feedback[2] = -1
        elif entered_driver_start_year < self.state["mystery_driver_info"][1]:
            feedback[2] = 1

        # Wins comparison
        entered_driver_wins = self.df.loc[self.df['driver'] == entered_driver, 'wins'].values[0]
        if entered_driver_wins > self.state["mystery_driver_info"][5]:
            feedback[3] = -1
        elif entered_driver_wins < self.state["mystery_driver_info"][5]:
            feedback[3] = 1

        # Flag comparison
        entered_driver_flag = self.df.loc[self.df['driver'] == entered_driver, 'flag'].values[0]
        feedback[4] = 0 if entered_driver_flag == self.state["mystery_driver_info"][3] else 1

        # Team comparison 
        entered_driver_team = self.df.loc[self.df['driver'] == entered_driver, 'team'].values[0]
        if entered_driver_team == self.state["mystery_driver_info"][4]:
            feedback[5] = 0
        else:
            feedback[5] = 1
        return feedback
        
    def render(self, mode='human'):
        # You can print out game state, feedback, or any other user-friendly information
        print(f"Attempts: {self.state['attempts']}")

    def close(self):
        # Perform any cleanup, if necessary
        pass

# A simple Q-learning agent class
class QLearningAgent:
    def __init__(self, action_space, state_space, learning_rate=0.1, discount_factor=0.99, exploration_rate=1.0, max_exploration_rate=1.0, min_exploration_rate=0.1, exploration_decay_rate=0.0001):
        self.action_space = action_space
        self.state_space = state_space
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.max_exploration_rate = max_exploration_rate
        self.min_exploration_rate = min_exploration_rate
        self.exploration_decay_rate = exploration_decay_rate
        self.q_table = np.zeros((state_space, action_space.n))

    # QLearningAgent class - inside the class definition
    def get_state_as_int(self, observation):
        # Since feedback can be -1, 0, 1, first map these values to 0, 1, 2 for non-negative encoding
        mapped_feedback = observation['feedback'] + 1  # Shift from [-1, 0, 1] to [0, 1, 2]
        base = 3  # Now we use base 3 because we have three possible values: 0, 1, 2
        
        feedback_int = 0
        for i, val in enumerate(mapped_feedback):
            feedback_int *= base
            feedback_int += val
        
        # Calculate the state by combining attempts and feedback values
        state_int = observation['attempts'] * (base ** len(mapped_feedback)) + feedback_int
        return int(state_int)
        
    def choose_action(self, observation):
        state = self.get_state_as_int(observation)
        exploration_rate_threshold = np.random.uniform(0, 1)
        if exploration_rate_threshold > self.exploration_rate:
            action = np.argmax(self.q_table[state])  # Exploit the best known value
        else:
            action = self.action_space.sample()  # Explore action space
        return action

    def learn(self, state, action, reward, next_state, done):
        old_value = self.q_table[state, action]
        next_max = np.max(self.q_table[next_state])
        
        new_value = (1 - self.learning_rate) * old_value + self.learning_rate * (reward + self.discount_factor * next_max)
        self.q_table[state, action] = new_value
        
        if done:
            self.exploration_rate = max(self.min_exploration_rate, self.exploration_rate * self.exploration_decay_rate)


In [46]:
env = F1MysteryDriverEnv(df)
num_feedback_options = 3 ** len(env.observation_space.spaces['feedback'].nvec)
num_attempts = env.observation_space.spaces['attempts'].n
total_state_space_size = (num_attempts+1) * num_feedback_options
agent = QLearningAgent(env.action_space, total_state_space_size)


In [47]:
agent.exploration_decay_rate

0.0001

In [58]:
number_of_episodes = 180000

In [76]:
for episode in range(number_of_episodes):
    observation = env.reset()
    state = int(agent.get_state_as_int(observation))
    total_reward = 500
    done = False
    steps = 0
    
    while not done and steps < num_attempts:
        action = agent.choose_action(observation)
        # If it's the first step, print out the chosen driver
        if steps == 0:
            driver_name = df['driver'].iloc[action]  # Assuming 'driver' is the column name with the driver names

        next_observation, reward, done, info = env.step(action)
        next_state = agent.get_state_as_int(next_observation)
        agent.learn(state, action, reward, next_state, done)

        state = next_state
        observation = next_observation
        total_reward += reward
        steps += 1

    if (episode + 1) % 100 == 0:
        print(f"Episode {episode + 1} finished with total reward: {total_reward}. First driver picked was {driver_name}")

    # Adjust the exploration rate
    agent.exploration_rate = max(agent.min_exploration_rate, agent.exploration_rate * np.exp(-agent.exploration_decay_rate*episode))


Episode 100 finished with total reward: 700. First driver picked was Fernando Alonso
Episode 200 finished with total reward: 800. First driver picked was Will Stevens
Episode 300 finished with total reward: 700. First driver picked was Daniil Kvyat
Episode 400 finished with total reward: 800. First driver picked was Will Stevens
Episode 500 finished with total reward: 0. First driver picked was Will Stevens
Episode 600 finished with total reward: 900. First driver picked was Will Stevens
Episode 700 finished with total reward: 600. First driver picked was Will Stevens
Episode 800 finished with total reward: 0. First driver picked was Will Stevens
Episode 900 finished with total reward: 600. First driver picked was Yuki Tsunoda
Episode 1000 finished with total reward: 1000. First driver picked was Will Stevens
Episode 1100 finished with total reward: 800. First driver picked was Will Stevens
Episode 1200 finished with total reward: 800. First driver picked was Pascal Wehrlein
Episode 13

In [75]:
agent.exploration_rate = 1

In [77]:
# Set exploration rate to 0 to use the learned policy without exploration
agent.exploration_rate = 0

# Set up a counter for the number of wins
num_wins = 0
sum_pts = 0
num_evaluation_episodes = 10000

for episode in range(num_evaluation_episodes):
    observation = env.reset()
    done = False
    
    while not done:
        # Agent always exploits the best action from Q-table
        action = agent.choose_action(observation)
        observation, reward, done, info = env.step(action)
        
        # If the reward is positive, it means the agent found the correct driver
        if reward > 0:
            num_wins += 1
        sum_pts += reward

# Calculate the winning percentage
winning_percentage = (num_wins / num_evaluation_episodes) * 100
average_points = sum_pts / num_evaluation_episodes
ppw = sum_pts / num_wins
guesses = (700 - (len(df)*ppw+100)/len(df))/100 # approximate

print(f"Winning percentage over {num_evaluation_episodes} games: {winning_percentage}%")
print(f"Average points over {num_evaluation_episodes} games: {average_points}")
print(f"Average points per win {ppw}")
print(f"Average guesses per win {guesses}")

Winning percentage over 10000 games: 91.13%
Average points over 10000 games: 281.38
Average points per win 308.7676945023593
Average guesses per win 3.8947791953272843


In [70]:
import pickle
# Saving the agent instance to a file
with open('agent_4_25_2024.pkl', 'wb') as file:
    pickle.dump(agent, file)
