In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import scipy

### All the lap times have been picked up from Wikipedia and car specs from fastestlaps.com 

![Track](images/NurburgringNordschielfe.jpg)

In [2]:
df = pd.read_csv("C:\\Users\\dell\\Desktop\\Python Programs\\MiniPrject\\CSVfiles\\nurburgringCarData.csv")
df.head()

Unnamed: 0,Car,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,Drivetrain,Powertrain,Aspiration
0,Mercedes - AMG One,6.29.09,1063,1620,2.7,7.0,15.6,352,752.0,1000.0,1352.0,AWD,Hybrid,Turbocharged
1,Porsche 911 GT2 RS Manthey Performance Kit (991),6.40.30,700,1420,2.6,7.8,21.5,343,469.0,621.0,806.0,RWD,ICE,Turbocharged
2,AMG GT Black Series,6.48.05,730,1637,2.8,8.3,26.7,352,449.0,600.0,740.0,RWD,ICE,Turbocharged
3,Corvette ZR1X ZTK Package,6.49.27,1267,1794,2.2,5.8,12.4,225 mph,381.0,484.0,608.0,AWD,Hybrid,Turbocharged
4,Porsche 911 GT3 RS (992) specs,6.49.30,585,1480,2.6,8.3,,296,509.0,785.0,988.0,RWD,ICE,Natural Aspiration


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Car                       54 non-null     object 
 1   Lap time                  54 non-null     object 
 2   PS                        54 non-null     int64  
 3   KG                        54 non-null     int64  
 4   0-100                     53 non-null     object 
 5   0-200                     52 non-null     object 
 6   0-300                     31 non-null     object 
 7   top speed                 54 non-null     object 
 8   Est. downforce @ 200 kph  51 non-null     float64
 9   Est. downforce @ 250 kph  51 non-null     float64
 10  Est. downforce @ 300 kph  50 non-null     float64
 11  Drivetrain                54 non-null     object 
 12  Powertrain                54 non-null     object 
 13  Aspiration                54 non-null     object 
dtypes: float64(3

### Converting lap times to just seconds

In [4]:
def laptimeSec(x):
    List = x.split(".")
    seconds = 60*float(List[0]) + float(List[1]) + 0.01*float(List[2])
    return seconds


df["Lap time"] = df["Lap time"].apply(lambda x: laptimeSec(x))

### Adding a row for power in horsepower as well as kw (hp is more used and kw is best for calculations)

In [5]:
def pstoKw (x): 
    return x*0.7355

def pstphp (x): 
    return x*0.98632

df["HP"] = df["PS"].apply(lambda x: pstoKw(x))
df["KW"] = df["PS"].apply(lambda x: pstphp(x))

#df 

### Now ill be converting all the mph to kmph
### Top Speed cpnversion is straight forward
### Acceleration follows saturation function so v(t) = Top speed (1 - e^(-kt)) 
### t(v) = inverse of v(t) which gives: -1/k * ln (1 - v/top speed) where k is like the rate constant
### Note: Here we are treating time as dependant because we need to find times for acceleration

In [6]:
def MphToKmph(x):
    return x*1.60934

def topSpeed(y):
    if pd.isna(y):
        return np.nan
    x = str(y)
    List = x.split(" ")
    if "mph" in List:
        return MphToKmph(float(List[0]))
    else:
        return float(List[0])
    
df["top speed"] = df["top speed"].apply(lambda x: topSpeed(x))
#df
df1 = df.copy()
#df1 

### Checking NaN values

In [7]:
nan_df = df[df.isna().any(axis =1 )]
#nan_df

### here we will use 1 / 1 - (v / topspeed)**p) because for top speeds close to 300, the time is blowing up

In [8]:
def t_of_v(v, A, p, topspeed):
    return A * np.log(1 / (1 - (v / topspeed) ** p))

def best_params(v, t, topspeed, p0=[1.0, 1.0]):
    params, cov = scipy.optimize.curve_fit(lambda v, A, p: t_of_v(v, A, p, topspeed), v, t, p0=p0, maxfev=5000 )
    return params 

In [9]:
def accelcorrector(y):
    x = y.copy()
    List = [x["0-100"], x["0-200"], x["0-300"]]
    if pd.isna(List[0]):
        return x  
    
    
    popped = False 
    if pd.isna(List[2]):
        List.pop(2)
        popped = True

    speeds = [] 
    times = [] 

    for i in List: 
        j = str(i)
        temp = j.split(" ")
        if "mph" in temp:
            mph_val = float(temp[-1])
            speeds.append(MphToKmph(mph_val))
            times.append(float(temp[0]))
        else: 
            times.append(float(temp[0]))
    
    
    if speeds:
        v = np.array(speeds, dtype=float)
        t = np.array(times, dtype=float)
        topspeed = float(x["top speed"])
        requiredV = np.array([100, 200, 300], dtype=float)
        
        
        A, p = best_params(v, t, topspeed)
        predicted_t = t_of_v(requiredV, A, p, topspeed)

        
        x["0-100"] = predicted_t[0]
        x["0-200"] = predicted_t[1]
        x["0-300"] = np.nan if popped else predicted_t[2]
        
        
        if popped:
            x["pred_0-300"] = predicted_t[2]
        else:
            x["pred_0-300"] = x["0-300"]
    else:
        
        x["pred_0-300"] = np.nan if popped else times[2]

    return x


In [10]:
df = df.apply(accelcorrector , axis = 1)
#df

  return A * np.log(1 / (1 - (v / topspeed) ** p))
  return A * np.log(1 / (1 - (v / topspeed) ** p))
  params, cov = scipy.optimize.curve_fit(lambda v, A, p: t_of_v(v, A, p, topspeed), v, t, p0=p0, maxfev=5000 )
  return A * np.log(1 / (1 - (v / topspeed) ** p))


In [11]:
df.head(7)

Unnamed: 0,0-100,0-200,0-300,Aspiration,Car,Drivetrain,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,HP,KG,KW,Lap time,PS,Powertrain,pred_0-300,top speed
0,2.7,7.0,15.6,Turbocharged,Mercedes - AMG One,AWD,752.0,1000.0,1352.0,781.8365,1620,1048.45816,389.09,1063,Hybrid,15.6,352.0
1,2.6,7.8,21.5,Turbocharged,Porsche 911 GT2 RS Manthey Performance Kit (991),RWD,469.0,621.0,806.0,514.85,1420,690.424,400.3,700,ICE,21.5,343.0
2,2.8,8.3,26.7,Turbocharged,AMG GT Black Series,RWD,449.0,600.0,740.0,536.915,1637,720.0136,408.05,730,ICE,26.7,352.0
3,2.2,5.8,12.4,Turbocharged,Corvette ZR1X ZTK Package,AWD,381.0,484.0,608.0,931.8785,1794,1249.66744,409.27,1267,Hybrid,12.4,362.1015
4,2.6,8.3,,Natural Aspiration,Porsche 911 GT3 RS (992) specs,RWD,509.0,785.0,988.0,430.2675,1480,576.9972,409.3,585,ICE,,296.0
5,1.938598,6.636789,17.792403,Turbocharged,Corvette ZR1 ZTK Package,RWD,281.0,426.0,608.0,793.6045,1701,1064.23928,410.76,1079,ICE,17.792403,362.0
6,2.712191,9.45539,31.044418,Supercharged,Ford Mustand GTD,RWD,422.0,659.0,949.0,598.697,1772,802.86448,412.07,814,ICE,31.044418,325.0


In [12]:
df1.head(7)

Unnamed: 0,Car,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,Drivetrain,Powertrain,Aspiration,HP,KW
0,Mercedes - AMG One,389.09,1063,1620,2.7,7,15.6,352.0,752.0,1000.0,1352.0,AWD,Hybrid,Turbocharged,781.8365,1048.45816
1,Porsche 911 GT2 RS Manthey Performance Kit (991),400.3,700,1420,2.6,7.8,21.5,343.0,469.0,621.0,806.0,RWD,ICE,Turbocharged,514.85,690.424
2,AMG GT Black Series,408.05,730,1637,2.8,8.3,26.7,352.0,449.0,600.0,740.0,RWD,ICE,Turbocharged,536.915,720.0136
3,Corvette ZR1X ZTK Package,409.27,1267,1794,2.2,5.8,12.4,362.1015,381.0,484.0,608.0,AWD,Hybrid,Turbocharged,931.8785,1249.66744
4,Porsche 911 GT3 RS (992) specs,409.3,585,1480,2.6,8.3,,296.0,509.0,785.0,988.0,RWD,ICE,Natural Aspiration,430.2675,576.9972
5,Corvette ZR1 ZTK Package,410.76,1079,1701,2.2 mph 60,5.9 mph 120,16.0 mph 180,362.0,281.0,426.0,608.0,RWD,ICE,Turbocharged,793.6045,1064.23928
6,Ford Mustand GTD,412.07,814,1772,2.8 mph 60,10.2 mph 130,21.2 mph 170,325.0,422.0,659.0,949.0,RWD,ICE,Supercharged,598.697,802.86448


In [13]:
df = df1.copy()

### There still seems to be a significant difference in acceleration, ill manually correct them with approximations. The above formula will work better to predict intermediate timings, such as 0-250, 0-150, 0-175 etc. It wont work for extremes.

In [14]:
def accelcorrector2(y):
    mults = {
        "60": 1.005,
        "120": 1.01, 
        "130": 0.99,
        "170": 1.075,
        "180": 1.05
    }

    x = y.copy()
    List = [x["0-100"], x["0-200"], x["0-300"]]

    if pd.isna(List[0]):
        return x
    
    popped = False 
    if pd.isna(List[2]):
        List.pop(2)
        popped = True

    speeds = [] 
    times = [] 
    m = []

    for i in List: 
        j = str(i)
        temp = j.split(" ")

        if "mph" in temp:
            mph_val = temp[-1]
            m.append(mults.get(mph_val, 1.0))
            temp[-1] = MphToKmph(float(temp[-1]))
            speeds.append(temp[-1])
            times.append(float(temp[0]))
        else: 
            times.append(float(temp[0]))
            m.append(1.0)

    predictedt = [times[i]*m[i] for i in range(len(times))]

    x["0-100"] = predictedt[0]
    x["0-200"] = predictedt[1]
    x["0-300"] = np.nan if popped else predictedt[2]

    return x


In [15]:
df = df.apply(accelcorrector2 , axis = 1)
#df

In [16]:
df.head(7)

Unnamed: 0,Car,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,Drivetrain,Powertrain,Aspiration,HP,KW
0,Mercedes - AMG One,389.09,1063,1620,2.7,7.0,15.6,352.0,752.0,1000.0,1352.0,AWD,Hybrid,Turbocharged,781.8365,1048.45816
1,Porsche 911 GT2 RS Manthey Performance Kit (991),400.3,700,1420,2.6,7.8,21.5,343.0,469.0,621.0,806.0,RWD,ICE,Turbocharged,514.85,690.424
2,AMG GT Black Series,408.05,730,1637,2.8,8.3,26.7,352.0,449.0,600.0,740.0,RWD,ICE,Turbocharged,536.915,720.0136
3,Corvette ZR1X ZTK Package,409.27,1267,1794,2.2,5.8,12.4,362.1015,381.0,484.0,608.0,AWD,Hybrid,Turbocharged,931.8785,1249.66744
4,Porsche 911 GT3 RS (992) specs,409.3,585,1480,2.6,8.3,,296.0,509.0,785.0,988.0,RWD,ICE,Natural Aspiration,430.2675,576.9972
5,Corvette ZR1 ZTK Package,410.76,1079,1701,2.211,5.959,16.8,362.0,281.0,426.0,608.0,RWD,ICE,Turbocharged,793.6045,1064.23928
6,Ford Mustand GTD,412.07,814,1772,2.814,10.098,22.79,325.0,422.0,659.0,949.0,RWD,ICE,Supercharged,598.697,802.86448


In [17]:
df1.head(7)

Unnamed: 0,Car,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,Drivetrain,Powertrain,Aspiration,HP,KW
0,Mercedes - AMG One,389.09,1063,1620,2.7,7,15.6,352.0,752.0,1000.0,1352.0,AWD,Hybrid,Turbocharged,781.8365,1048.45816
1,Porsche 911 GT2 RS Manthey Performance Kit (991),400.3,700,1420,2.6,7.8,21.5,343.0,469.0,621.0,806.0,RWD,ICE,Turbocharged,514.85,690.424
2,AMG GT Black Series,408.05,730,1637,2.8,8.3,26.7,352.0,449.0,600.0,740.0,RWD,ICE,Turbocharged,536.915,720.0136
3,Corvette ZR1X ZTK Package,409.27,1267,1794,2.2,5.8,12.4,362.1015,381.0,484.0,608.0,AWD,Hybrid,Turbocharged,931.8785,1249.66744
4,Porsche 911 GT3 RS (992) specs,409.3,585,1480,2.6,8.3,,296.0,509.0,785.0,988.0,RWD,ICE,Natural Aspiration,430.2675,576.9972
5,Corvette ZR1 ZTK Package,410.76,1079,1701,2.2 mph 60,5.9 mph 120,16.0 mph 180,362.0,281.0,426.0,608.0,RWD,ICE,Turbocharged,793.6045,1064.23928
6,Ford Mustand GTD,412.07,814,1772,2.8 mph 60,10.2 mph 130,21.2 mph 170,325.0,422.0,659.0,949.0,RWD,ICE,Supercharged,598.697,802.86448


### Adding power to weight and downforce to weight ratios

In [18]:
df["Power/Weight"] = df["KW"]/df["KG"]
df["downforce 250 / weight"] = df["Est. downforce @ 250 kph"] / df["KG"]
df["downforce 200 / weight"] = df["Est. downforce @ 200 kph"] / df["KG"]
#df

### Now ill be dealing NaNs in columns like 0 - 200,300 and downforce at 200,250,300 for cars having top speed less than 200 250 and 300

In [19]:
df["0-200"] = np.where(df["top speed"] < 200 , np.inf , df["0-200"]) 
df["Est. downforce @ 200 kph"] = np.where(df["top speed"] < 200 , 0 , df["Est. downforce @ 200 kph"]) 
df["0-300"] = np.where(df["top speed"] < 300 , np.inf , df["0-300"]) 
df["Est. downforce @ 300 kph"] = np.where(df["top speed"] < 300 , 0 , df["Est. downforce @ 300 kph"]) 
df["Est. downforce @ 250 kph"] = np.where(df["top speed"] < 250 , 0 , df["Est. downforce @ 250 kph"]) 
#df

### Sorting by laptimes

In [20]:
df.sort_values(by = "Lap time" , inplace=True)
#df

### Checking rows with no NaNs

In [21]:
Notnan_df = df.dropna(how = "any")
Notnan_df.sort_values(by = "Lap time")
#Notnan_df

Unnamed: 0,Car,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,Drivetrain,Powertrain,Aspiration,HP,KW,Power/Weight,downforce 250 / weight,downforce 200 / weight
31,Porsche 919 Hybrid Evo,319.55,1160,829,1.8,4.1,8.5,369.0,900.0,1600.0,2400.0,AWD,Hybrid,Turbocharged,853.18,1144.1312,1.380134,1.930036,1.085645
32,Volkswagen IDR,365.36,680,1100,2.25,4.5,inf,270.0,650.0,1200.0,0.0,AWD,Electric,Electric,500.14,670.6976,0.609725,1.090909,0.590909
0,Mercedes - AMG One,389.09,1063,1620,2.7,7.0,15.6,352.0,752.0,1000.0,1352.0,AWD,Hybrid,Turbocharged,781.8365,1048.45816,0.647196,0.617284,0.464198
1,Porsche 911 GT2 RS Manthey Performance Kit (991),400.3,700,1420,2.6,7.8,21.5,343.0,469.0,621.0,806.0,RWD,ICE,Turbocharged,514.85,690.424,0.486214,0.437324,0.330282
2,AMG GT Black Series,408.05,730,1637,2.8,8.3,26.7,352.0,449.0,600.0,740.0,RWD,ICE,Turbocharged,536.915,720.0136,0.439837,0.366524,0.274282
3,Corvette ZR1X ZTK Package,409.27,1267,1794,2.2,5.8,12.4,362.1015,381.0,484.0,608.0,AWD,Hybrid,Turbocharged,931.8785,1249.66744,0.696582,0.269788,0.212375
4,Porsche 911 GT3 RS (992) specs,409.3,585,1480,2.6,8.3,inf,296.0,509.0,785.0,0.0,RWD,ICE,Natural Aspiration,430.2675,576.9972,0.389863,0.530405,0.343919
5,Corvette ZR1 ZTK Package,410.76,1079,1701,2.211,5.959,16.8,362.0,281.0,426.0,608.0,RWD,ICE,Turbocharged,793.6045,1064.23928,0.625655,0.250441,0.165197
6,Ford Mustand GTD,412.07,814,1772,2.814,10.098,22.79,325.0,422.0,659.0,949.0,RWD,ICE,Supercharged,598.697,802.86448,0.453084,0.371896,0.238149
7,Porsche 911 GT3 RS Manthey Performance Kit (99...,413.34,560,1400,2.7,9.2,inf,285.0,355.0,555.0,0.0,RWD,ICE,Natural Aspiration,411.88,552.3392,0.394528,0.396429,0.253571


In [22]:
df.describe()

  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  diff_b_a = subtract(b, a)


Unnamed: 0,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,HP,KW,Power/Weight,downforce 250 / weight,downforce 200 / weight
count,54.0,54.0,54.0,53.0,53.0,52.0,54.0,51.0,51.0,51.0,54.0,54.0,54.0,51.0,51.0
mean,443.144074,716.944444,1758.185185,3.358264,inf,inf,307.853731,167.196078,251.764706,276.392157,527.312639,707.136644,0.407357,0.181257,0.117742
std,35.551354,434.637711,384.663983,1.030279,,,49.397849,216.899426,343.598579,431.451971,319.676037,428.691868,0.220572,0.325512,0.191808
min,319.55,300.0,829.0,1.8,4.1,8.5,199.0,0.0,0.0,0.0,220.65,295.896,0.178681,0.0,0.0
25%,419.3525,502.5,1447.75,2.7,8.787,23.5,270.5,0.0,0.0,0.0,369.58875,495.6258,0.298629,0.0,0.0
50%,447.005,600.0,1664.0,3.2,10.6,31.3,315.0,70.0,120.0,120.0,441.3,591.792,0.339204,0.061842,0.037493
75%,467.98,695.0,2145.75,3.8,15.5,,332.0,290.5,436.5,458.5,511.1725,685.4924,0.4098,0.260701,0.171833
max,523.0,3018.0,2655.0,5.8,inf,inf,496.0,900.0,1600.0,2400.0,2219.739,2976.71376,1.380134,1.930036,1.085645


In [23]:
Notnan_df.describe()

  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  diff_b_a = subtract(b, a)


Unnamed: 0,Lap time,PS,KG,0-100,0-200,0-300,top speed,Est. downforce @ 200 kph,Est. downforce @ 250 kph,Est. downforce @ 300 kph,HP,KW,Power/Weight,downforce 250 / weight,downforce 200 / weight
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,444.678,659.42,1735.8,3.4015,inf,inf,302.80203,163.48,248.06,270.26,485.00341,650.399134,0.388977,0.179573,0.115808
std,36.504456,279.007724,375.605144,1.044124,,,43.059055,217.455343,346.056525,433.581469,205.210181,275.190898,0.195045,0.328592,0.193252
min,319.55,300.0,829.0,1.8,4.1,8.5,199.0,0.0,0.0,0.0,220.65,295.896,0.178681,0.0,0.0
25%,418.1175,502.5,1431.75,2.7,9.25,23.55,266.25,0.0,0.0,0.0,369.58875,495.6258,0.295223,0.0,0.0
50%,448.5,595.0,1653.5,3.25,10.6,31.75,313.0,67.5,110.0,66.0,437.6225,586.8604,0.33556,0.05739,0.035988
75%,468.9575,677.5,2098.0,3.875,16.775,,329.0,276.75,433.5,454.0,498.30125,668.2318,0.4019,0.256911,0.164019
max,523.0,1941.0,2655.0,5.8,inf,inf,415.0,900.0,1600.0,2400.0,1427.6055,1914.44712,1.380134,1.930036,1.085645


In [24]:
df.to_csv("CleanedData.csv")
Notnan_df.to_csv("CleanedData_withNoNaN.csv")