In [1]:
# import dependencies
import time
from pprint import pprint
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import calendar

In [2]:
rc =pd.read_csv('Racetrack_data.csv', index_col=0)

In [3]:
years=["2019"]




# function to parse dates as we read in results csv files
def dateparser(dstr):
    """ Returns a datetime object for any date string in the format
        Month, dd, yyyy """
    d = dict((v,k) for k,v in enumerate(calendar.month_name))
    if type(dstr) != float:
        mon, dd, yyyy = dstr.split(',')
        date_str = '/'.join([str(d[mon]), dd, yyyy])
        return pd.to_datetime(date_str, format='%m/%d/%Y')
    else:
        pass
    return None

# function that'll help determine which string values are actually numbers
def isNumber(x):
    if pd.isnull(x):
        return False
    try:
        float(x)
        return True
    except ValueError:
        pass
    return False

def subset(string, chars):
    if pd.isnull(string):
        return None
    else:
        return string[:chars]

# function that'll convert finishing time into a proper timedelta value
def timeparser(time_series):
    
    new_time = []
    for index, item in enumerate(time_series):
        prev_case = True
        if pd.isnull(item):
            new_time.append(None)
            continue
        
        if 'Lap' in item:
            new_time.append(None)
            prev_case = False
            
        if ('+' in item) & ("'" not in item):
            t = datetime.strptime(item, "+%S.%f")
            t_delta = timedelta(hours=t.hour, minutes=t.minute,seconds=t.second, microseconds=t.microsecond)
            t_delta = t_delta + base_delta
            new_time.append(t_delta)
            prev_case = False
            
        if ('+' in item) & ("'" in item):
            t = datetime.strptime(item, "+%M'%S.%f")
            t_delta = timedelta(hours=t.hour, minutes=t.minute,seconds=t.second, microseconds=t.microsecond)
            t_delta = t_delta + base_delta
            new_time.append(t_delta)
            prev_case = False
            
        if prev_case:
            base_time = datetime.strptime(item, "%M'%S.%f")
            base_delta = timedelta(hours=base_time.hour, minutes=base_time.minute,
                                   seconds=base_time.second, microseconds=base_time.microsecond)
            new_time.append(base_delta)
            
    return new_time

In [4]:
dfs = []
rows_read = []
for yr in reversed(years):
    print(yr, end=', ')
    df = pd.read_csv('./archive/'+yr+'_data.csv', index_col=0, parse_dates=['Date'], date_parser=dateparser)
#     df['Track_Temp'] = df['Track_Temp'].map(lambda x: int(x[:2]) if isNumber(subset(x,2)) else x)
#     df['Air_Temp'] = df['Air_Temp'].map(lambda x: int(x[:2]) if isNumber(subset(x,2)) else x)
#     df['Humidity'] = df['Humidity'].map(lambda x: float(x[:2])/100 if isNumber(subset(x,2)) else x)
    df['Finish_Time'] = timeparser(df.Time)
    df['Track'] = df.TRK.map(lambda x: x+' - ') + df.Track.map(lambda x: x.split(' - ')[1])
    df = df.merge(rc, on='Track', how='left')
    rows_read.append(len(df))
    dfs.append(df)
print('Completed Time Conversion and Merge!!')

2019, Completed Time Conversion and Merge!!


In [5]:
result = pd.concat(dfs, ignore_index=True)
fn = 'MotoGP_2019.csv'
result.to_csv(fn)

In [6]:
pd.read_csv('MotoGP_2019.csv').head(50)

Unnamed: 0.1,Unnamed: 0,Year,TRK,Track,Category,Session,Date,Track_Condition,Track_Temp,Air_Temp,...,Finish_Time,Trk Length,Left_Corners,Right_Corners,track_width,length of longest straight,MotoGP_avg_speed,GP_distance,Moto2_distance,Moto3_distance
0,0,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:36.902000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
1,1,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:36.925000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
2,2,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:37.222000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
3,3,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:37.359000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
4,4,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:37.502000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
5,5,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:39.222000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
6,6,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:39.383000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
7,7,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:41.990000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
8,8,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:44.308000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
9,9,2019,QAT,QAT - Losail International Circuit,MotoGP,RAC,2019-03-10,Dry,19.0,18.0,...,0 days 00:42:46.538000000,5.4,6.0,10.0,12.0,1068.0,166.6,118.4,107.6,96.8
