# Parsing Multi-Section F1 CSV File

This notebook demonstrates how to parse a multi-section F1 CSV file into separate pandas DataFrames for each section.

## 1. Import Required Libraries
We will use pandas for data manipulation.

In [50]:
import pandas as pd
import io
import logging

## 2. Set the Path to the CSV File

In [51]:
file_path = 'data/f1-20250907.csv'

## 3. Read the File as Text

In [52]:
with open(file_path, 'r') as f:
    lines = f.readlines()

## 4. Initialize Section Variables

In [53]:
sections = {}
current_section = None
section_lines = []
logging.basicConfig(level=logging.INFO, force=True)


## 5. Parse Sections and Collect Data

In [54]:
for line in lines:
    line = line.strip()
    if not line:
        continue
    # Detect section header

    logging.debug(line)
    if line and not line[0].isdigit() and ',' not in line:
        if current_section and section_lines:
            # Save previous section
            df = pd.read_csv(io.StringIO('\n'.join(section_lines)))
            sections[current_section] = df
        current_section = line
        section_lines = []
    elif current_section:
        section_lines.append(line)

## 6. Save the Last Section

In [55]:
if current_section and section_lines:
    df = pd.read_csv(io.StringIO('\n'.join(section_lines)))
    sections[current_section] = df

## 7. Display All DataFrames

In [56]:
for name, df in sections.items():
    logging.info(f'\nSection: {name}')
    if logging.getLogger().isEnabledFor(logging.DEBUG):
        logging.debug("\n" + df.to_string())
    #display(df)

INFO:root:
Section: Gemini
INFO:root:
Section: grok
INFO:root:
Section: grok Think harder
INFO:root:
Section: copilot
INFO:root:
Section: co-pilot think deeper
INFO:root:
Section: claude
INFO:root:
Section: Results


## 8. (Optional) Access a Specific Section
You can access a DataFrame for a specific section by its name, e.g. `sections['Results']`.

In [57]:
# Example: Access Results section
sections.get('grok').head()
#Results_df = sections.get('Results')
#Results_df.head() if Results_df is not None else 'Results section not found.'

Unnamed: 0,Position,DRIVER,Team
0,1,Oscar Piastri,McLaren
1,2,Lando Norris,McLaren
2,3,Charles Leclerc,Ferrari
3,4,Max Verstappen,Red Bull Racing
4,5,Lewis Hamilton,Ferrari


## 9. (Optional) Visualize or Analyze Data
You can now use pandas or visualization libraries to analyze or plot the data.

In [58]:
# Example: Access Results section
Results_df = sections.get('Results')
Results_df.head() if Results_df is not None else 'Results section not found.'

Unnamed: 0,POS.,NO.,DRIVER,TEAM,LAPS,TIME / RETIRED,PTS.
0,1,1,Max Verstappen,Red Bull Racing,53,1:13:24.325,25
1,2,4,Lando Norris,McLaren,53,+19.207s,18
2,3,81,Oscar Piastri,McLaren,53,+21.351s,15
3,4,16,Charles Leclerc,Ferrari,53,+25.624s,12
4,5,63,George Russell,Mercedes,53,+32.881s,10


In [60]:
from scipy.stats import spearmanr

# Compare each section DataFrame with Results_df using Spearman rank correlation
results_order = Results_df['DRIVER']

spearman_scores = {}
for name, df in sections.items():
    if name == 'Results':
        continue
    # Try to get 'DRIVER' column, skip if not present
    if 'DRIVER' in df.columns:
        other_order = df['DRIVER']
    elif 'Driver' in df.columns:
        other_order = df['Driver']
    else:
        continue
    # Align only drivers present in both lists
    common_drivers = [d for d in results_order if d in other_order.values]
    if len(common_drivers) < 2:
        continue
    # Get ranks for common drivers
    results_ranks = [results_order.tolist().index(d) for d in common_drivers]
    other_ranks = [other_order.tolist().index(d) for d in common_drivers]
    score, _ = spearmanr(results_ranks, other_ranks)
    spearman_scores[name] = score

# Display Spearman rank correlation scores in descending order

spearman_scores = dict(sorted(spearman_scores.items(), key=lambda item: item[1], reverse=True)) 
for name, score in spearman_scores.items():
    print(f"Spearman rank correlation with Results for '{name}': {score:.2f}")




Spearman rank correlation with Results for 'grok': 0.91
Spearman rank correlation with Results for 'Gemini': 0.83
Spearman rank correlation with Results for 'grok Think harder': 0.76
Spearman rank correlation with Results for 'claude': 0.64
Spearman rank correlation with Results for 'copilot': 0.59
Spearman rank correlation with Results for 'co-pilot think deeper': 0.59
