In [23]:
import os
import re
import pandas as pd
import numpy as np

In [24]:
# Folder containing CSV files
folder_path = '../src/lap_times_fixed/'

# List all files in the folder ending with '.csv'
file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

# Initialize a dictionary to store dataframes
dataframes = {}

# Loop through each file and read its data into a pandas DataFrame
for file_path in file_paths:
    # Extract file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Read CSV into a dataframe and store it in the dictionary
    dataframes[file_name] = pd.read_csv(file_path)

## Check dataframe content

In [25]:
dataframes['Dataframe_1'].head()

Unnamed: 0,Plats,År,Plac,Klass,#,Namn,Klubb,Varv,Tid,Varv 1,Varv 2,Varv 3,Varv 4,Varv 5,Varv 6
0,FMCK Skövde,2023,1.0,Motion 40-49,1111,Magnus Edberg,Huskvarna MK,4,53:05.5,14:08.2,14:17.5,11:47.5,12:52.1,,
1,FMCK Skövde,2023,2.0,Motion -39,518,Alexander Fält,Försvarsmaktens EK,4,53:45.2,14:38.8,14:31.4,12:13.6,12:21.2,,
2,FMCK Skövde,2023,3.0,Motion -39,577,Måns Dalén,FMCK Skövde,4,53:51.5,14:20.4,14:53.3,12:03.4,12:34.3,,
3,FMCK Skövde,2023,4.0,Ungdom E1,175,William Almén,SMK Värnamo,4,54:06.2,14:26.6,14:50.0,12:07.0,12:42.5,,
4,FMCK Skövde,2023,5.0,Motion 40-49,944,Niklas Strömberg,Götene MK,4,54:13.8,14:40.2,15:07.9,11:56.7,12:28.9,,


## Convert lap times to time seconds format.

In [26]:
# Function to convert lap time strings to seconds
def convert_to_seconds(lap_time):
    if pd.notnull(lap_time) and isinstance(lap_time, str) and ':' in lap_time:
        minutes, rest = lap_time.split(':')
        seconds_tenths = rest.split('.')
        if len(seconds_tenths) == 2:
            seconds, tenths = map(int, seconds_tenths)
            total_seconds = int(minutes) * 60 + seconds + tenths / 10
            return total_seconds
    return pd.NaT if pd.notnull(lap_time) else pd.NaT

# Apply the conversion function to lap time columns
for df_name in dataframes:
    varv_columns = [col for col in dataframes[df_name].columns if col.startswith('Varv ')]
    for col in varv_columns:
        dataframes[df_name][col] = dataframes[df_name][col].apply(convert_to_seconds)
        dataframes[df_name][col] = dataframes[df_name][col].replace({'KÖR': pd.NaT, 'BRUTIT': pd.NaT})


In [27]:
dataframes['Dataframe_1'].head()

Unnamed: 0,Plats,År,Plac,Klass,#,Namn,Klubb,Varv,Tid,Varv 1,Varv 2,Varv 3,Varv 4,Varv 5,Varv 6
0,FMCK Skövde,2023,1.0,Motion 40-49,1111,Magnus Edberg,Huskvarna MK,4,53:05.5,848.2,857.5,707.5,772.1,NaT,NaT
1,FMCK Skövde,2023,2.0,Motion -39,518,Alexander Fält,Försvarsmaktens EK,4,53:45.2,878.8,871.4,733.6,741.2,NaT,NaT
2,FMCK Skövde,2023,3.0,Motion -39,577,Måns Dalén,FMCK Skövde,4,53:51.5,860.4,893.3,723.4,754.3,NaT,NaT
3,FMCK Skövde,2023,4.0,Ungdom E1,175,William Almén,SMK Värnamo,4,54:06.2,866.6,890.0,727.0,762.5,NaT,NaT
4,FMCK Skövde,2023,5.0,Motion 40-49,944,Niklas Strömberg,Götene MK,4,54:13.8,880.2,907.9,716.7,748.9,NaT,NaT


## Find fastest lap time in odd Varv columns

In [28]:
# Function to find the fastest lap time from odd-numbered "Varv " columns
def fastest_odd_lap(row):
    odd_varv_cols = [col for col in row.index if col.startswith('Varv ') and int(col.split()[-1]) % 2 != 0]
    odd_lap_times = [row[col] for col in odd_varv_cols if not pd.isnull(row[col])]
    if odd_lap_times:
        return min(odd_lap_times)
    return np.nan  # or pd.NaT if preferred

# Iterate through all Dataframes and add the "FastestOddLap" column
for df_name, df in dataframes.items():
    df['FastestOddLap'] = df.apply(fastest_odd_lap, axis=1)


In [29]:
dataframes['Dataframe_1'].head()

Unnamed: 0,Plats,År,Plac,Klass,#,Namn,Klubb,Varv,Tid,Varv 1,Varv 2,Varv 3,Varv 4,Varv 5,Varv 6,FastestOddLap
0,FMCK Skövde,2023,1.0,Motion 40-49,1111,Magnus Edberg,Huskvarna MK,4,53:05.5,848.2,857.5,707.5,772.1,NaT,NaT,707.5
1,FMCK Skövde,2023,2.0,Motion -39,518,Alexander Fält,Försvarsmaktens EK,4,53:45.2,878.8,871.4,733.6,741.2,NaT,NaT,733.6
2,FMCK Skövde,2023,3.0,Motion -39,577,Måns Dalén,FMCK Skövde,4,53:51.5,860.4,893.3,723.4,754.3,NaT,NaT,723.4
3,FMCK Skövde,2023,4.0,Ungdom E1,175,William Almén,SMK Värnamo,4,54:06.2,866.6,890.0,727.0,762.5,NaT,NaT,727.0
4,FMCK Skövde,2023,5.0,Motion 40-49,944,Niklas Strömberg,Götene MK,4,54:13.8,880.2,907.9,716.7,748.9,NaT,NaT,716.7


## Find fastest lap time in even Varv columns

In [30]:
# Function to find the fastest lap time from even-numbered "Varv " columns
def fastest_even_lap(row):
    even_varv_cols = [col for col in row.index if col.startswith('Varv ') and int(col.split()[-1]) % 2 == 0]
    even_lap_times = [row[col] for col in even_varv_cols if not pd.isnull(row[col])]
    if even_lap_times:
        return min(even_lap_times)
    return np.nan  # or pd.NaT if preferred

# Iterate through all Dataframes and add the "FastestEvenLap" column
for df_name, df in dataframes.items():
    df['FastestEvenLap'] = df.apply(fastest_even_lap, axis=1)


In [31]:
dataframes['Dataframe_1'].head(-10)

Unnamed: 0,Plats,År,Plac,Klass,#,Namn,Klubb,Varv,Tid,Varv 1,Varv 2,Varv 3,Varv 4,Varv 5,Varv 6,FastestOddLap,FastestEvenLap
0,FMCK Skövde,2023,1.0,Motion 40-49,1111,Magnus Edberg,Huskvarna MK,4,53:05.5,848.2,857.5,707.5,772.1,,,707.5,772.1
1,FMCK Skövde,2023,2.0,Motion -39,518,Alexander Fält,Försvarsmaktens EK,4,53:45.2,878.8,871.4,733.6,741.2,,,733.6,741.2
2,FMCK Skövde,2023,3.0,Motion -39,577,Måns Dalén,FMCK Skövde,4,53:51.5,860.4,893.3,723.4,754.3,,,723.4,754.3
3,FMCK Skövde,2023,4.0,Ungdom E1,175,William Almén,SMK Värnamo,4,54:06.2,866.6,890.0,727.0,762.5,,,727.0,762.5
4,FMCK Skövde,2023,5.0,Motion 40-49,944,Niklas Strömberg,Götene MK,4,54:13.8,880.2,907.9,716.7,748.9,,,716.7,748.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356,FMCK Skövde,2023,75.0,Bredd,212,Melker Djurstedt,Djursdala SK,5,38:00.3,1085.6,1127.6,1168.8,1266.1,1232.0,,1085.6,1127.6
357,FMCK Skövde,2023,76.0,Junior,134,Max Nordmark,Varbergs MK,5,41:12.0,1083.4,1216.0,1180.8,1465.6,1126.0,,1083.4,1216.0
358,FMCK Skövde,2023,77.0,Bredd,336,Andreas Johansson,Carlsborgs MK,4,55:53.0,773.4,825.7,803.5,950.3,NaT,,773.4,825.7
359,FMCK Skövde,2023,78.0,Bredd,269,Albin Bäckström Herbertsson,Säffle MCK,4,04:29.8,912.4,974.6,922.8,1059.8,NaT,,912.4,974.6


## Drop rows that don't have enough data.

In [32]:
# Delete rows with missing values in Varv 2 and Varv 3 columns
for df_name, df in dataframes.items():
    columns_to_check = ['Varv 2', 'Varv 3']  # Columns Varv 2 and Varv 3 only
    df.dropna(subset=columns_to_check, how='any', inplace=True)

## Check for missing data

In [33]:
# Find and print rows with missing values in FastestOddLap or FastestEvenLap columns
for df_name, df in dataframes.items():
    missing_values = df[df[['FastestOddLap', 'FastestEvenLap']].isnull().any(axis=1)]
    if not missing_values.empty:
        print(f"DataFrame {df_name}: Rows with missing values in FastestOddLap or FastestEvenLap:")
        print(missing_values)
    else:
        print(f"DataFrame {df_name}: No rows with missing values in FastestOddLap or FastestEvenLap.")


DataFrame Dataframe_1: No rows with missing values in FastestOddLap or FastestEvenLap.
DataFrame Dataframe_2: No rows with missing values in FastestOddLap or FastestEvenLap.
DataFrame Dataframe_3: No rows with missing values in FastestOddLap or FastestEvenLap.
DataFrame Dataframe_4: No rows with missing values in FastestOddLap or FastestEvenLap.
DataFrame Dataframe_5: No rows with missing values in FastestOddLap or FastestEvenLap.
DataFrame Dataframe_6: No rows with missing values in FastestOddLap or FastestEvenLap.


## Calculate average of fastest odd and even laps.

In [34]:
# Calculate average of FastestOddLap and FastestEvenLap and add to AvgFastestLap column
for df_name, df in dataframes.items():
    df['AvgFastestLap'] = df[['FastestOddLap', 'FastestEvenLap']].mean(axis=1)


In [35]:
dataframes['Dataframe_1'].head(-10)

Unnamed: 0,Plats,År,Plac,Klass,#,Namn,Klubb,Varv,Tid,Varv 1,Varv 2,Varv 3,Varv 4,Varv 5,Varv 6,FastestOddLap,FastestEvenLap,AvgFastestLap
0,FMCK Skövde,2023,1.0,Motion 40-49,1111,Magnus Edberg,Huskvarna MK,4,53:05.5,848.2,857.5,707.5,772.1,,,707.5,772.1,739.80
1,FMCK Skövde,2023,2.0,Motion -39,518,Alexander Fält,Försvarsmaktens EK,4,53:45.2,878.8,871.4,733.6,741.2,,,733.6,741.2,737.40
2,FMCK Skövde,2023,3.0,Motion -39,577,Måns Dalén,FMCK Skövde,4,53:51.5,860.4,893.3,723.4,754.3,,,723.4,754.3,738.85
3,FMCK Skövde,2023,4.0,Ungdom E1,175,William Almén,SMK Värnamo,4,54:06.2,866.6,890.0,727.0,762.5,,,727.0,762.5,744.75
4,FMCK Skövde,2023,5.0,Motion 40-49,944,Niklas Strömberg,Götene MK,4,54:13.8,880.2,907.9,716.7,748.9,,,716.7,748.9,732.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,FMCK Skövde,2023,71.0,Bredd,234,Patrik Kerttu,Kortedala MK,5,30:23.4,931.7,1021.8,1158.9,1168.5,1142.2,,931.7,1021.8,976.75
353,FMCK Skövde,2023,72.0,Junior,172,Alexander Andersson,Tranemo MK,5,35:10.1,915.5,1196.3,1251.3,1116.3,1230.5,,915.5,1116.3,1015.90
354,FMCK Skövde,2023,73.0,Dam,415,emelie johansson,Götene MK,5,35:10.7,1017.2,1103.7,1107.1,1302.5,1180.1,,1017.2,1103.7,1060.45
355,FMCK Skövde,2023,74.0,Bredd,311,Johan Wolgers,FMCK Borås,5,37:50.4,1039.6,1154.0,1161.9,1373.3,1141.4,,1039.6,1154.0,1096.80


## Sort data on AvgFastestLap columns

In [36]:
# Sort each DataFrame by AvgFastestLap column
for df_name, df in dataframes.items():
    dataframes[df_name] = df.sort_values(by='AvgFastestLap')

In [37]:
dataframes['Dataframe_1'].head(-10)

Unnamed: 0,Plats,År,Plac,Klass,#,Namn,Klubb,Varv,Tid,Varv 1,Varv 2,Varv 3,Varv 4,Varv 5,Varv 6,FastestOddLap,FastestEvenLap,AvgFastestLap
282,FMCK Skövde,2023,1.0,Senior,1,Albin Elowson,FMCK Skövde,6,02:44.8,583.3,628.4,594.0,639.4,634.9,684.6,583.3,628.4,605.85
283,FMCK Skövde,2023,2.0,Senior,6,Oskar Ljungström,Falköpings MK,6,05:06.5,601.1,648.1,624.3,666.4,661.3,705.1,601.1,648.1,624.60
284,FMCK Skövde,2023,3.0,Junior,182,Arvid Karlsson,FMCK Skövde,6,07:22.3,646.2,672.8,645.2,685.1,683.1,709.7,645.2,672.8,659.00
285,FMCK Skövde,2023,4.0,Senior,20,Robert Friberg,FMCK Skövde,6,07:31.5,643.4,679.3,647.3,697.5,678.4,705.3,643.4,679.3,661.35
288,FMCK Skövde,2023,7.0,Senior,5,Nisse Bengtsson,Tidaholms MK,6,09:12.2,624.9,698.0,664.1,710.6,689.7,764.6,624.9,698.0,661.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,FMCK Skövde,2023,113.0,Motion 50-59,1473,Håkan Jägmar,Norrahammars MK,4,26:09.5,1308.6,1482.1,1165.3,1213.3,,,1165.3,1213.3,1189.30
134,FMCK Skövde,2023,135.0,Ungdom E1,124,Sam Aronsson,Töreboda MK,4,29:33.2,1765.3,1221.6,1226.8,1159.4,,,1226.8,1159.4,1193.10
255,FMCK Skövde,2023,112.0,Ungdom E0,11,Alicia Isaksson,Falköpings MK,4,22:50.8,1323.1,1207.2,1203.9,1236.6,,,1203.9,1207.2,1205.55
366,FMCK Skövde,2023,85.0,Dam,429,Tilda Mehtonen,FMCK Skövde,3,01:52.7,1030.3,1442.9,1239.5,NaT,,,1030.3,1442.9,1236.60
