# F1 Pitstop Data Analysis

This notebook analyzes Formula 1 race data to understand pitstop patterns and optimize timing strategies.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
plt.style.use('seaborn')
pd.set_option('display.max_columns', None)

## Load and Examine Data

In [None]:
# Load the F1 race data
df = pd.read_csv('../data/f1dataset.csv')

# Display basic information about the dataset
print("Dataset Info:")
df.info()

print("\nFirst few rows:")
df.head()

print("\nBasic statistics:")
df.describe()

## Data Quality Check

In [None]:
# Check for missing values
print("Missing values in each column:")
df.isnull().sum()

# Check unique values in categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns
for col in categorical_columns:
    print(f"\nUnique values in {col}:")
    print(df[col].value_counts().head())

## Initial Pitstop Analysis

In [None]:
# Basic pitstop statistics
# Note: Update these column names based on your actual data structure
pitstop_cols = [col for col in df.columns if 'pit' in col.lower()]

if pitstop_cols:
    print("Pitstop-related columns found:")
    for col in pitstop_cols:
        print(f"\nStatistics for {col}:")
        print(df[col].describe())
        
        # Visualize distribution
        plt.figure(figsize=(10, 6))
        sns.histplot(data=df, x=col)
        plt.title(f'Distribution of {col}')
        plt.show()