# Perspective 1 Heatmap Visualizations

In [1]:
# Cell 1: Install and Import Required Packages
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
import seaborn as sns
import matplotlib.pyplot as plt

# Cell 2: Load and Prepare Data
def load_f1_data():
    """Load and prepare the F1 data"""
    try:
        df = pd.read_csv('merged_f1_data_1994_2022.csv')
    except FileNotFoundError:
        return None
    
    # Handle non-numeric finishing positions (DNF, DSQ, etc.)
    df['FinPos'] = df['FinPos'].astype(str)
    return df

# Load the data
df = load_f1_data()

# Cell 3: Data Preparation Functions
def create_heatmap_data(df, selected_year=None):
    """Create heatmap data for starting vs finishing positions"""
    
    if selected_year and selected_year != 'Total':
        df_filtered = df[df['Year'] == selected_year].copy()
        title_suffix = f" - {selected_year}"
    else:
        df_filtered = df.copy()
        title_suffix = " - All Years (1994-2022)"
    
    # Group all non-finishers under "DNF" category
    df_filtered = df_filtered.copy()
    non_finish_codes = ['NC', 'DNF', 'DSQ', 'DNS', 'EX', 'WD']
    
    # Use .loc to avoid the warning
    df_filtered.loc[:, 'FinPos_Display'] = df_filtered['FinPos'].copy()
    
    # Replace all non-finish codes with 'DNF'
    for code in non_finish_codes:
        df_filtered.loc[:, 'FinPos_Display'] = df_filtered['FinPos_Display'].replace(code, 'DNF')
    
    # Get the range of starting positions (limit to reasonable range)
    max_start_pos = min(df_filtered['Pos'].max(), 26)  # F1 grid is max 26 cars
    
    # Create categories for finishing positions
    numeric_positions = []
    non_numeric_positions = []
    
    for pos in df_filtered['FinPos_Display'].unique():
        try:
            num_pos = int(float(pos))
            if 1 <= num_pos <= 30:  # Reasonable range for F1
                numeric_positions.append(num_pos)
        except (ValueError, TypeError):
            if pos != 'nan':  # Skip NaN values
                non_numeric_positions.append(pos)
    
    # Sort numeric positions
    numeric_positions = sorted(set(numeric_positions))
    
    # Create the full list of finish position categories (DNF at the end)
    finish_categories = numeric_positions + sorted(set(non_numeric_positions))
    start_categories = list(range(1, max_start_pos + 1))
    
    # Create the heatmap matrix
    heatmap_data = np.zeros((len(finish_categories), len(start_categories)))
    
    # Fill the heatmap matrix
    for _, row in df_filtered.iterrows():
        start_pos = row['Pos']
        finish_pos = row['FinPos_Display']  # Use the modified display position
        
        if start_pos in start_categories:
            start_idx = start_categories.index(start_pos)
            
            # Handle finish position
            try:
                finish_pos_int = int(float(finish_pos))
                if finish_pos_int in finish_categories:
                    finish_idx = finish_categories.index(finish_pos_int)
                    heatmap_data[finish_idx, start_idx] += 1
            except (ValueError, TypeError):
                if finish_pos in finish_categories:
                    finish_idx = finish_categories.index(finish_pos)
                    heatmap_data[finish_idx, start_idx] += 1
    
    return heatmap_data, start_categories, finish_categories, title_suffix, len(df_filtered)

# Cell 4: Plotly Interactive Heatmap Function
def create_plotly_heatmap(df, selected_year=None):
    """Create an interactive heatmap using Plotly"""
    
    heatmap_data, start_categories, finish_categories, title_suffix, total_records = create_heatmap_data(df, selected_year)
    
    # Create hover text with percentages
    hover_text = []
    for i in range(len(finish_categories)):
        hover_row = []
        for j in range(len(start_categories)):
            count = int(heatmap_data[i, j])
            percentage = (count / total_records * 100) if total_records > 0 else 0
            hover_row.append(f'Starting: P{start_categories[j]}<br>Finishing: {finish_categories[i]}<br>Count: {count}<br>Percentage: {percentage:.1f}%')
        hover_text.append(hover_row)
    
    # Create the heatmap with improved colorscale
    fig = go.Figure(data=go.Heatmap(
        z=heatmap_data,
        x=[f"P{pos}" for pos in start_categories],
        y=[str(pos) for pos in finish_categories],
        colorscale='RdYlBu_r',  # Red-Yellow-Blue reversed (red for high values)
        # Alternative colorscales you can try:
        # colorscale='Hot',
        # colorscale='Plasma',
        # colorscale='Inferno',
        # colorscale='Turbo',
        # colorscale='Spectral_r',
        hoverongaps=False,
        hovertemplate='%{customdata}<extra></extra>',
        customdata=hover_text,
        colorbar=dict(title="Number of Occurrences")
    ))
    
    fig.update_layout(
        title={
            'text': f'F1 Starting vs Finishing Position Heatmap{title_suffix}<br><sub>Total Records: {total_records:,}</sub>',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis_title='Starting Position',
        yaxis_title='Finishing Position',
        xaxis=dict(side='bottom'),
        yaxis=dict(autorange='reversed'),  # Reverse y-axis so P1 is at top
        width=1000,
        height=700,
        font=dict(size=12)
    )
    
    return fig

# Cell 5: Interactive Widget Setup
def create_interactive_heatmap_widget(df):
    """Create an interactive widget with dropdown for year selection"""
    
    if df is None:
        return
    
    # Get available years and add 'Total' option
    years = sorted(df['Year'].unique())
    year_options = [('All Years (Total)', 'Total')] + [(str(year), year) for year in years]
    
    # Create dropdown widget
    year_dropdown = widgets.Dropdown(
        options=year_options,
        value='Total',
        description='Select Year:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='200px')
    )
    
    # Create info display
    info_output = widgets.Output()
    
    # Output widget for the plot
    plot_output = widgets.Output()
    
    def update_heatmap(change):
        """Update heatmap when dropdown changes"""
        selected_year = change['new']
        year_value = None if selected_year == 'Total' else selected_year
        
        # Update info
        with info_output:
            clear_output(wait=True)
            if selected_year == 'Total':
                records_count = len(df)
                print(f"📊 Showing data for ALL YEARS: {records_count:,} total records")
            else:
                year_data = df[df['Year'] == selected_year]
                records_count = len(year_data)
                races_count = year_data['Grand Prix'].nunique()
                print(f"📊 Showing data for {selected_year}: {records_count} records from {races_count} races")
        
        # Update plot
        with plot_output:
            clear_output(wait=True)
            try:
                fig = create_plotly_heatmap(df, year_value)
                fig.show()
            except Exception as e:
                pass
    
    # Bind the dropdown to the update function
    year_dropdown.observe(update_heatmap, names='value')
    
    with plot_output:
        try:
            fig = create_plotly_heatmap(df, None)
            fig.show()
        except Exception as e:
            pass
    
    # Create and display the widget layout
    widget_box = widgets.VBox([
        widgets.HTML("<h3>🏎️ F1 Starting vs Finishing Position Analysis</h3>"),
        year_dropdown,
        info_output,
        plot_output
    ])
    
    display(widget_box)

# Cell 6: Data Analysis Functions
def analyze_position_data(df):
    """Analyze interesting patterns in the data"""
    
    if df is None:
        return
    
    # Finishing position breakdown with all non-finishers grouped as DNF
    finish_counts = df['FinPos'].value_counts()
    
    # Separate numeric and non-numeric finishes
    numeric_finishes = 0
    total_dnf_count = 0
    
    # Group all non-finishes together
    non_finish_codes = ['DNF', 'DSQ', 'NC', 'DNS', 'EX', 'WD']
    for pos, count in finish_counts.items():
        try:
            int(float(pos))
            numeric_finishes += count
        except (ValueError, TypeError):
            if pos in non_finish_codes:
                total_dnf_count += count
    
    # Position change analysis (classified finishers only)
    df_numeric = df.copy()
    df_numeric['FinPos_Numeric'] = pd.to_numeric(df_numeric['FinPos'], errors='coerce')
    df_classified = df_numeric.dropna(subset=['FinPos_Numeric'])
    
    if len(df_classified) > 0:
        df_classified['Position_Change'] = df_classified['Pos'] - df_classified['FinPos_Numeric']
        
        # Pole position analysis
        pole_data = df_classified[df_classified['Pos'] == 1]
        if len(pole_data) > 0:
            pole_wins = len(pole_data[pole_data['FinPos_Numeric'] == 1])

# Cell 7: Quick Statistics by Year
def show_yearly_stats(df):
    """Show quick statistics by year"""
    
    if df is None:
        return
        
    yearly_stats = df.groupby('Year').agg({
        'DriverCode': 'count',
        'Grand Prix': 'nunique'
    }).rename(columns={'DriverCode': 'Total_Records', 'Grand Prix': 'Races'}).copy()
    
    # Calculate DNF rates by year
    yearly_dnf = []
    for year in sorted(df['Year'].unique()):
        year_data = df[df['Year'] == year]
        total = len(year_data)
        dnf_count = len(year_data[~year_data['FinPos'].str.isnumeric().fillna(False)])
        dnf_rate = (dnf_count / total * 100) if total > 0 else 0
        yearly_dnf.append(dnf_rate)
    
    yearly_stats.loc[:, 'DNF_Rate'] = yearly_dnf

# Create the interactive widget
create_interactive_heatmap_widget(df)
analyze_position_data(df)
show_yearly_stats(df)

VBox(children=(HTML(value='<h3>🏎️ F1 Starting vs Finishing Position Analysis</h3>'), Dropdown(description='Sel…



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

