# Test Data Collection

Testing the F1DataCollector class with single race collection.

In [None]:
import sys
sys.path.append('../src')

from data_collection import F1DataCollector
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## Test 1: Single Race Collection (2024 Monaco GP)

In [None]:
# Initialize collector
collector = F1DataCollector()

print("Testing single race collection...")
print("Race: 2024 Monaco Grand Prix")

In [None]:
# Collect Monaco GP (Round 8 in 2024)
monaco_results = collector.collect_single_race(2024, 8)

print(f"\nCollected {len(monaco_results)} driver results")

## Examine the Data

In [None]:
# Check shape and columns
print(f"DataFrame shape: {monaco_results.shape}")
print(f"\nColumns ({len(monaco_results.columns)}):")
print(monaco_results.columns.tolist())

In [None]:
# Display key columns
key_cols = ['Position', 'GridPosition', 'DriverNumber', 'FullName', 
            'TeamName', 'Points', 'Status', 'year', 'race_name', 'circuit']

print("\nRace Results:")
print(monaco_results[key_cols])

In [None]:
# Check data types
print("\nData types:")
print(monaco_results[key_cols].dtypes)

In [None]:
# Look for missing values
print("\nMissing values:")
print(monaco_results[key_cols].isnull().sum())

## Analyze Monaco Results

In [None]:
# Check position changes
monaco_results['position_change'] = (
    monaco_results['GridPosition'].astype(float) - 
    monaco_results['Position'].astype(float)
)

print("Position changes at Monaco:")
print(monaco_results[['FullName', 'GridPosition', 'Position', 'position_change']].sort_values('Position'))

In [None]:
# DNF analysis
dnf_mask = monaco_results['Status'] != 'Finished'
dnf_count = dnf_mask.sum()
dnf_rate = (dnf_count / len(monaco_results)) * 100

print(f"\nDNF Statistics:")
print(f"Total DNFs: {dnf_count}")
print(f"DNF Rate: {dnf_rate:.1f}%")

if dnf_count > 0:
    print("\nDNF Details:")
    print(monaco_results[dnf_mask][['FullName', 'TeamName', 'Status']])

In [None]:
# Quick stats
print("\nQuick Statistics:")
print(f"Winner: {monaco_results[monaco_results['Position'] == '1']['FullName'].values[0]}")
print(f"Pole position: {monaco_results[monaco_results['GridPosition'] == '1']['FullName'].values[0]}")
print(f"Average position change: {monaco_results['position_change'].mean():.2f}")
print(f"Max position gained: {monaco_results['position_change'].max():.0f}")
print(f"Max position lost: {monaco_results['position_change'].min():.0f}")

## Test 2: Try Another Race (2024 Bahrain GP)

In [None]:
# Collect Bahrain GP
bahrain_results = collector.collect_single_race(2024, 1)

print(f"\nBahrain GP: {len(bahrain_results)} results collected")
print("\nTop 10 finishers:")
print(bahrain_results[key_cols].head(10))

## Summary

In [None]:
print("=" * 60)
print("Data Collection Test Summary")
print("=" * 60)
print("\n✓ Single race collection working")
print("✓ All essential columns present:")
print("  - GridPosition, Position, DriverNumber")
print("  - FullName, TeamName, Points, Status")
print("  - year, race_name, circuit, date")
print("\n✓ Data quality looks good")
print("✓ DNF tracking working")
print("✓ Metadata correctly added")
print("\nReady for full season collection!")
print("=" * 60)