In [2]:
import pandas as pd
import numpy as np
import os
import torch, torch.nn as nn, torch.optim as optim, torch.nn.functional as F
import math
import requests


In [20]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
db_dir = '/content/drive/My Drive/f1ml/db/'

In [87]:
# The time format in the Ergast database is MM:SS.ms
def time_to_int(t):
  if (t == float):
    return t
  t2 = str(t)
  ts = t2.rsplit(':')
  if ('\\N' in t2):
    return None
  if (not '.' in t2):
    return None
  if (len(ts) > 1):
    return int(ts[0]) * 60 + float(ts[1])
  else:
    return float(ts[0])

### Status Embedding

In [60]:
stat_emb = [
  [4.0, 3.0, 130.0], # Accident/Collision
  [22.0, 5.0, 10.0, 23.0, 44.0, 47.0, 30.0, 32.0, 8.0, 38.0, 43.0, 85.0, 9.0, 86.0, 6.0, 2.0, 7.0, 87.0, 71.0, 41.0, 46.0, 37.0, 65.0, 78.0, 25.0, 74.0, 75.0, 26.0, 51.0, 40.0, 79.0, 36.0, 83.0, 80.0, 21.0, 69.0, 72.0, 70.0, 27.0, 60.0, 63.0, 29.0, 64.0, 66.0, 56.0, 59.0, 61.0, 42.0, 39.0, 48.0, 49.0, 34.0, 35.0, 28.0, 24.0, 33.0, 129.0, 76.0, 91.0, 131.0, 101.0, 132.0, 135.0,  84.0,  136.0,  105.0,  137.0,  138.0,  139.0], # Car issues
  [11.0,  13.0,  12.0,  14.0,  17.0,  15.0,  16.0, 18.0,  55.0,  58.0,  45.0, 88.0], # Lapped
  [0.0], # No problem
  [77.0, 73.0, 82.0, 81.0, 62.0, 54.0, 31.0, 96.0], # Other
  [20.0] #'Spun off'
] 

def stat_embed(id):
  _emb = np.zeros(6)
  for i in range(6):
    if id in stat_emb[i]:
      _emb[i] = 1
      return _emb
  _emb[4] = 1
  return _emb # Other

def stat_unbed(array, retired=False):
  _a = np.copy(array)
  if (retired):
    _a[3] = 0
  _i = np.argmax(_a)
  if (_i == 0):
    return 'Accident/Collision'
  elif (_i == 1):
    return 'Car Issues'
  elif (_i == 2):
    return 'Lapped'
  elif (_i == 3):
    return 'No Problem'
  elif (_i == 4):
    return 'Other'
  elif (_i == 5):
    return 'Spun off'
  else:
    return 'something is wrong'

### laptime embedding

In [57]:
def lt_embed(laptime):
  # laptime should be a float with 3 decimal places
  _lt = math.floor(laptime * 10)
  _lt_emb = []
  _ret = []
  for i in range(4):
    _lt_emb.append(int(_lt % 10))
    _lt = math.floor(_lt / 10)
  _ret = np.zeros(2)
  if (_lt_emb[-1] == 1):
    _ret[0] = 1
  elif (_lt_emb[-1] == 2):
    _ret[1] = 1
  elif (_lt_emb[-1] > 2):
    _ret[0] = 1
    _ret[1] = 1
  for i in range(3):
    _t = np.zeros(10)
    _t[_lt_emb[2 - i]] = 1
    _ret = np.append(_ret, _t)
  return _ret

def lt_unbed(l_array):
  _ret = 0
  if (l_array[0] >= 0.5 and l_array[1] >= 0.5):
    _ret += 300
  elif (l_array[0] >= 0.5):
    _ret += 100
  elif (l_array[1] >= 0.5):
    _ret += 200
  _ret += np.argmax(l_array[2:12]) * 10
  _ret += np.argmax(l_array[12:22]) * 1
  _ret += np.argmax(l_array[22:32]) * 0.1
  return _ret

In [58]:
l = lt_embed(123.456)
print(len(l))

32


In [59]:
u = lt_unbed(l)
print(u)

123.4


### driverId embedding

In [56]:
drivers_short = pd.read_csv(db_dir + 'drivers_short.csv')
# from driverId to our id
def driver_embed_idx(driverId):
  row = drivers_short.query(f'driverId == {driverId}').index
  if (row.empty):
    return 0
  return row.item() + 1

# from our id to driverId
def driver_unbed_idx(idx):
  row = drivers_short.iloc[idx-1]
  return row['driverId']

# from our id to array
def driver_embed(idx):
  _e = np.zeros(130)
  _e[idx-1] = 1
  return _e

# from array to our id
def driver_unbed(d_array):
  return np.argmax(d_array) + 1

## get quali lap times or older races laps

In [129]:
def get_times(year, round, lap):
  circuits = pd.read_csv(db_dir + 'circuits.csv')
  drivers = pd.read_csv(db_dir + 'drivers.csv')
  if (year <= 2020):
    race = np.load(db_dir + f'/races_npy/{year}/{round-1}_in.npy')
    race = race[:lap]
    return race
  else:
    c_s = {}

    quali = requests.get(f'http://ergast.com/api/f1/{year}/{round}/qualifying.json')
    if (quali.status_code < 200):
      return None
    j = quali.json()
    circuitRef = j['MRData']['RaceTable']['Races'][0]['Circuit']['circuitId']
    circuitId = circuits.query(f'circuitRef == \'{circuitRef}\'')['circuitId'].item()

    ret = np.zeros(130)
    ret[circuitId] = 1
    ret = np.append(ret, np.zeros(1)) # lap number/ total number of laps

    if (round - 1 < 1):
      d_s = requests.get(f'http://ergast.com/api/f1/{year-1}/driverStandings.json')
      c_s = requests.get(f'http://ergast.com/api/f1/{year-1}/constructorStandings.json')
    else:
      d_s = requests.get(f'http://ergast.com/api/f1/{year}/{round-1}/driverStandings.json')
      c_s = requests.get(f'http://ergast.com/api/f1/{year}/{round-1}/constructorsStandings.json')
    if (d_s.status_code < 200):
      ds_ok = False
    else:
      ds_ok = True
      d_s = d_s.json()
    if (c_s.status_code < 200):
      cs_ok = False
    else:
      cs_ok = True
      c_s = c_s.json()

    for i in range(20):
      if (i < len(j['MRData']['RaceTable']['Races'][0]['QualifyingResults'])):
        driverRef = j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]['Driver']['driverId']
        did = drivers.query(f'driverRef == \'{driverRef}\'')['driverId'].item()
        our_did = driver_embed_idx(did)
        ret = np.append(ret, driver_embed(our_did))

        cref = j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]['Constructor']['constructorId']

        ds = np.zeros(3)
        if (not ds_ok):
          ret = np.append(ret, ds)
        else:
          for k in range(20):
            if (k < len(d_s['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings'])):
              if (d_s['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings'][k]['Driver']['driverId'] == driverRef):
                if (k <= 1):
                  ds[0] = 1
                if (k <=3):
                  ds[1] = 1
                if (k <=10):
                  ds[2] = 1
                ret = np.append(ret, ds)
                break
            if (k == 19): # if there is no standing for this driver
              ret = np.append(ret, ds)

        cs = np.zeros(2)
        if (not cs_ok):
          ret = np.append(ret, cs)
        else:
          for k in range(len(c_s['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings'])):
            if (c_s['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings'][k]['Constructor']['constructorId'] == cref):
              if (k <= 1):
                cs[0] = 1
              if (k <= 3):
                cs[1] = 1
              ret = np.append(ret, cs)
              break
            if (k == len(c_s['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings'])-1): # if there is no standing for this constructor
              ret = np.append(ret, cs)

        pos = np.zeros(21)
        _p = i 
        pos[_p] = 1
        ret = np.append(ret, pos)

        pit = np.zeros(1)
        ret = np.append(ret, pit)

        stat = stat_embed(0)
        ret = np.append(ret, stat)

        if ('Q3' in j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]):
          t = j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]['Q3']
        elif ('Q2' in j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]):
          t = j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]['Q2']
        elif ('Q1' in j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]):
          t = j['MRData']['RaceTable']['Races'][0]['QualifyingResults'][i]['Q1']
        else:
          t = 0.0
        t = time_to_int(t)
        laptime = lt_embed(float(t))
        ret = np.append(ret, laptime)

        rando = np.zeros(1)
        ret = np.append(ret, rando)

      else:
        ret = np.append(ret, np.zeros(3920 - (20-i) * 196))
        break
      
    return np.expand_dims(ret, 0)





## scratch

In [130]:
a = get_times(2021, 1, 10)

In [131]:
len(a[0])

4051