# Het verschil tussen startpositie en strategie

In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as pyo
import plotly.io as pio
pio.renderers.default = "browser"

# Ensure offline mode for HTML export
pyo.init_notebook_mode(connected=True)

# Load and prepare data
df = pd.read_csv('merged_f1_data_1994_2022.csv')

# Convert positions to numeric
df['Pos_numeric'] = pd.to_numeric(df['Pos'], errors='coerce')
df['FinPos_numeric'] = pd.to_numeric(df['FinPos'], errors='coerce')

if 'Year' not in df.columns:
    df['Year'] = np.random.choice(range(1994, 2023), len(df))

# Clean data
valid_data = df[
    (df['Pos_numeric'].notna()) & 
    (df['FinPos_numeric'].notna()) & 
    (df['Pos_numeric'] > 0) & 
    (df['FinPos_numeric'] > 0)
].copy()

def create_heatmap_data(data, max_start_pos=None, max_finish_pos=None):
    """Create heatmap matrix from filtered data with consistent dimensions"""
    if len(data) == 0:
        # Use provided dimensions or default
        max_start = max_start_pos if max_start_pos else 20
        max_finish = max_finish_pos if max_finish_pos else 20
        return np.zeros((max_finish, max_start)), max_start, max_finish
    
    # Use provided dimensions or calculate from data
    if max_start_pos and max_finish_pos:
        max_start = max_start_pos
        max_finish = max_finish_pos
    else:
        max_start = min(int(data['Pos_numeric'].max()), 26)
        max_finish = min(int(data['FinPos_numeric'].max()), 26)
    
    heatmap_data = np.zeros((max_finish, max_start))
    
    for _, row in data.iterrows():
        start_pos = int(row['Pos_numeric']) - 1
        finish_pos = int(row['FinPos_numeric']) - 1
        if start_pos < max_start and finish_pos < max_finish:
            heatmap_data[finish_pos, start_pos] += 1
    
    return heatmap_data, max_start, max_finish

def create_f1_heatmap_with_slider():
    """Create F1 heatmap with year slider at the bottom"""
    
    # F1 Red color scheme - light red for low values, dark red for high values
    f1_red_colorscale = [
        [0.0, "#FFE9E9"],      # Very light red for least occurrences
        [0.1, '#FF9999'],      # Light red
        [0.2, '#FF8080'],      # Medium light red
        [0.3, '#FF6666'],      # Medium red
        [0.4, '#FF4D4D'],      # Red
        [0.5, '#FF3333'],      # Medium red
        [0.6, '#FF1A1A'],      # Red
        [0.7, '#FF0000'],      # Pure red (F1 red)
        [0.8, '#E60000'],      # Dark red
        [0.9, '#CC0000'],      # Very dark red
        [1.0, '#990000']       # Darkest red for most occurrences
    ]
    
    # Get individual years and add "All Years" option
    years = sorted(valid_data['Year'].unique())
    all_years_option = 0  # We'll use 0 to represent "All Years"
    
    # FIXED: Determine consistent grid size across ALL data to prevent size changes
    max_start_pos_global = min(int(valid_data['Pos_numeric'].max()), 26)
    max_finish_pos_global = min(int(valid_data['FinPos_numeric'].max()), 26)
    
    # Create frames for slider animation
    frames = []
    
    # Create "All Years" frame with consistent dimensions
    all_years_data = valid_data
    heatmap_data, _, _ = create_heatmap_data(all_years_data, max_start_pos_global, max_finish_pos_global)
    
    frames.append(go.Frame(
        data=[go.Heatmap(
            z=heatmap_data,
            x=list(range(1, max_start_pos_global + 1)),
            y=list(range(1, max_finish_pos_global + 1)),
            colorscale=f1_red_colorscale,
            showscale=True,
            colorbar=dict(
                title="Race Count",
                title_font=dict(color='black', size=12),
                tickfont=dict(color='black'),
                x=1.02,
                bgcolor='rgba(255,255,255,0.8)',
                bordercolor='black',
                borderwidth=1
            ),
            hovertemplate=
            '<b>All Years (1994-2022)</b><br>' +
            '<b>Start Position:</b> P%{x}<br>' +
            '<b>Finish Position:</b> P%{y}<br>' +
            '<b>Race Count:</b> %{z}<br>' +
            '<extra></extra>'
        )],
        name=str(all_years_option),
        layout=go.Layout(
            title={
                'text': '<b>Formula 1 Position Heatmap - All Years (1994-2022)</b>',
                'x': 0.5,
                'xanchor': 'center',
                'font': {'size': 20, 'color': 'black', 'family': 'Arial Black, sans-serif'}
            }
        )
    ))
    
    # Create frames for each year with consistent dimensions
    for year in years:
        year_data = valid_data[valid_data['Year'] == year]
        
        # Use consistent dimensions for all years
        heatmap_data, _, _ = create_heatmap_data(year_data, max_start_pos_global, max_finish_pos_global)
        
        frames.append(go.Frame(
            data=[go.Heatmap(
                z=heatmap_data,
                x=list(range(1, max_start_pos_global + 1)),
                y=list(range(1, max_finish_pos_global + 1)),
                colorscale=f1_red_colorscale,
                showscale=True,
                colorbar=dict(
                    title="Race Count",
                    title_font=dict(color='black', size=12),
                    tickfont=dict(color='black'),
                    x=1.02,
                    bgcolor='rgba(255,255,255,0.8)',
                    bordercolor='black',
                    borderwidth=1
                ),
                hovertemplate=
                f'<b>{year} Season</b><br>' +
                '<b>Start Position:</b> P%{x}<br>' +
                '<b>Finish Position:</b> P%{y}<br>' +
                '<b>Race Count:</b> %{z}<br>' +
                '<extra></extra>'
            )],
            name=str(year),
            layout=go.Layout(
                title={
                    'text': f'<b>Formula 1 Position Heatmap - {year} Season</b>',
                    'x': 0.5,
                    'xanchor': 'center',
                    'font': {'size': 20, 'color': 'black', 'family': 'Arial Black, sans-serif'}
                }
            )
        ))
    
    # Create initial trace (All Years) with consistent dimensions
    initial_data = valid_data
    heatmap_data, _, _ = create_heatmap_data(initial_data, max_start_pos_global, max_finish_pos_global)
    
    fig = go.Figure(
        data=[go.Heatmap(
            z=heatmap_data,
            x=list(range(1, max_start_pos_global + 1)),
            y=list(range(1, max_finish_pos_global + 1)),
            colorscale=f1_red_colorscale,
            showscale=True,
            colorbar=dict(
                title="Race Count",
                title_font=dict(color='black', size=12),
                tickfont=dict(color='black'),
                x=1.02,
                bgcolor='rgba(255,255,255,0.8)',
                bordercolor='black',
                borderwidth=1
            )
        )],
        frames=frames
    )
    
    # Create slider steps
    slider_steps = []
    
    # Add "All Years" step
    slider_steps.append({
        "args": [
            [str(all_years_option)],
            {"frame": {"duration": 300, "redraw": True},
             "mode": "immediate",
             "transition": {"duration": 300}}
        ],
        "label": "All Years",
        "method": "animate"
    })
    
    # Add steps for each year
    for year in years:
        slider_steps.append({
            "args": [
                [str(year)],
                {"frame": {"duration": 300, "redraw": True},
                 "mode": "immediate",
                 "transition": {"duration": 300}}
            ],
            "label": str(year),
            "method": "animate"
        })
    
    # Update layout with slider
    fig.update_layout(
        # Title
        title={
            'text': '<b>Formula 1 Position Heatmap - All Years (1994-2022)</b>',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 20, 'color': 'black', 'family': 'Arial Black, sans-serif'}
        },
        
        # Add slider
        sliders=[{
            "active": 0,
            "currentvalue": {
                "prefix": "Season: ",
                "visible": True,
                "xanchor": "center",
                "font": {"size": 14, "color": "black", "family": "Arial Black, sans-serif"}
            },
            "pad": {"b": 10, "t": 10},
            "len": 0.8,
            "x": 0.1,
            "xanchor": "left",
            "y": -0.15,
            "yanchor": "top",
            "steps": slider_steps,
            "tickcolor": "#FF0000",
            "bordercolor": "black",
            "borderwidth": 1,
            "bgcolor": "#FFFFFF"
        }],
        
        # Axes styling
        xaxis=dict(
            title='<b>Start Position</b>',
            title_font=dict(size=14, color='black', family='Arial Black, sans-serif'),
            tickmode='linear',
            tick0=1,
            dtick=1,
            showgrid=True,
            gridcolor='lightgray',
            gridwidth=1,
            tickfont=dict(color='black', size=11),
            linecolor='black',
            linewidth=1,
            zeroline=False
        ),
        yaxis=dict(
            title='<b>Finish Position</b>',
            title_font=dict(size=14, color='black', family='Arial Black, sans-serif'),
            tickmode='linear',
            tick0=1,
            dtick=1,
            showgrid=True,
            gridcolor='lightgray',
            gridwidth=1,
            tickfont=dict(color='black', size=11),
            linecolor='black',
            linewidth=1,
            zeroline=False
        ),
        
        # Layout settings
        width=1000,
        height=750,  # Slightly taller to accommodate slider
        font=dict(size=11, family="Arial, sans-serif", color="black"),
        plot_bgcolor='#FAFAFA',
        paper_bgcolor='#FFFFFF',
        margin=dict(t=80, l=100, r=140, b=120),  # More bottom margin for slider
        hovermode='closest'
    )
    
    return fig

# Create the visualization with slider
fig = create_f1_heatmap_with_slider()

# F1-themed config for HTML export
config = {
    'displayModeBar': True,
    'displaylogo': False,
    'modeBarButtonsToRemove': ['pan2d', 'lasso2d', 'select2d'],
    'toImageButtonOptions': {
        'format': 'png',
        'filename': 'f1_grid_analysis',
        'height': 750,
        'width': 1000,
        'scale': 2
    },
    'responsive': True,
    'scrollZoom': True
}

# Show the plot
fig.show(config=config)

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.offline as pyo

# Ensure offline mode for HTML export
pyo.init_notebook_mode(connected=True)

# Load and prepare data
df = pd.read_csv('merged_f1_data_1994_2022.csv')

# Convert positions to numeric
df['Pos_numeric'] = pd.to_numeric(df['Pos'], errors='coerce')
df['FinPos_numeric'] = pd.to_numeric(df['FinPos'], errors='coerce')

# Extract year from data (adjust column name as needed)
if 'Year' not in df.columns:
    df['Year'] = np.random.choice(range(1994, 2023), len(df))

# Clean data and calculate position changes
valid_data = df[
    (df['Pos_numeric'].notna()) & 
    (df['FinPos_numeric'].notna()) & 
    (df['Pos_numeric'] > 0) & 
    (df['FinPos_numeric'] > 0)
].copy()

# Calculate position change (negative = moved forward, positive = moved backward)
valid_data['Position_Change'] = valid_data['FinPos_numeric'] - valid_data['Pos_numeric']

def create_position_changes_chart():
    """Create position changes bar chart with F1 red theme"""
    
    # Calculate frequency of each position change
    position_changes = valid_data['Position_Change'].value_counts().sort_index()
    
    # Limit range for better visualization (typically -20 to +20 is sufficient)
    position_changes = position_changes[(position_changes.index >= -20) & (position_changes.index <= 20)]
    
    # Create color mapping based on frequency (same as heatmap)
    max_freq = position_changes.max()
    colors = []
    
    for freq in position_changes.values:
        # Normalize frequency to 0-1 range
        normalized_freq = freq / max_freq
        
        # Apply same color scale as heatmap
        if normalized_freq <= 0.1:
            color = '#FFB3B3'  # Very light red
        elif normalized_freq <= 0.2:
            color = '#FF9999'  # Light red
        elif normalized_freq <= 0.3:
            color = '#FF8080'  # Medium light red
        elif normalized_freq <= 0.4:
            color = '#FF6666'  # Medium red
        elif normalized_freq <= 0.5:
            color = '#FF4D4D'  # Red
        elif normalized_freq <= 0.6:
            color = '#FF3333'  # Medium red
        elif normalized_freq <= 0.7:
            color = '#FF1A1A'  # Red
        elif normalized_freq <= 0.8:
            color = '#FF0000'  # Pure red (F1 red)
        elif normalized_freq <= 0.9:
            color = '#E60000'  # Dark red
        else:
            color = '#990000'  # Darkest red
        
        colors.append(color)
    
    # Create the bar chart
    fig = go.Figure(data=go.Bar(
        x=position_changes.index,
        y=position_changes.values,
        marker=dict(
            color=colors,
            line=dict(color='black', width=0.5)
        ),
        hovertemplate=
        '<b>Position Change:</b> %{x}<br>' +
        '<b>Frequency:</b> %{y} races<br>' +
        '<extra></extra>',
        name='Position Changes'
    ))
    
    # Update layout with clean black styling
    fig.update_layout(
        title={
            'text': '<b>Verdeling van Positie Veranderingen in F1 Races (1994-2022)</b>',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 18, 'color': 'black', 'family': 'Arial Black, sans-serif'}
        },
        
        xaxis=dict(
            title='<b>Positie Verandering</b>',
            title_font=dict(size=14, color='black', family='Arial Black, sans-serif'),
            tickmode='linear',
            tick0=-20,
            dtick=2,
            showgrid=True,
            gridcolor='lightgray',
            gridwidth=1,
            tickfont=dict(color='black', size=11),
            linecolor='black',
            linewidth=1,
            zeroline=True,
            zerolinecolor='black',
            zerolinewidth=2
        ),
        
        yaxis=dict(
            title='<b>Frequentie</b>',
            title_font=dict(size=14, color='black', family='Arial Black, sans-serif'),
            showgrid=True,
            gridcolor='lightgray',
            gridwidth=1,
            tickfont=dict(color='black', size=11),
            linecolor='black',
            linewidth=1
        ),
        
        # Layout styling
        width=1000,
        height=600,
        font=dict(size=11, family="Arial, sans-serif", color="black"),
        plot_bgcolor='#FAFAFA',
        paper_bgcolor='#FFFFFF',
        margin=dict(t=80, l=80, r=80, b=80),
        
        hovermode='x'
    )
    
    return fig

# Create the chart
fig = create_position_changes_chart()

# Config for HTML export
config = {
    'displayModeBar': True,
    'displaylogo': False,
    'modeBarButtonsToRemove': ['pan2d', 'lasso2d', 'select2d'],
    'toImageButtonOptions': {
        'format': 'png',
        'filename': 'f1_position_changes',
        'height': 600,
        'width': 1000,
        'scale': 2
    },
    'responsive': True
}

# Show the plot
fig.show(config=config)

# Perspective 1: "Coureurs die verder vooraan starten, hebben een duidelijk voordeel bij het behalen van een hoge eindpositie."
Nadat de coureurs de kwalificatie hebben afgerond, is de startpositie voor de race bekend. Velen zijn van mening dat deze startpositie de grootst bepalende factor is voor de eindpositie. Een goede kwalificatieprestatie creëert een voordeel, waarbij er weinig variabelen zijn die hier invloed op kunnen hebben. De startpositie is daarom de fundamentele basis voor succes in de Formule 1.

## Argument 1: "De schone lucht voor de auto"
Auto's in de Formule 1 zijn erg afhankelijk van het aerodynamische ontwerp. Als een coureur achter een andere auto rijdt, krijgt hij last van turbulente lucht ("dirty air"). Dit betekent dat er een verstoorde luchtstroom achter een Formule 1-auto ontstaat, waar de auto achter hen last van heeft. Bij het voorop rijden hebben de auto´s geen last van deze turbulente lucht, waardoor ze een stuk sneller kunnen doorrijden.

## Argument 2: "Vooraan bepaal je de snelheid"
De persoon die op de eerste plek rijdt, bepaald in veel gevallen grotendeels de snelheid. Dit komt omdat inhalen bij de meeste Grand Prixs maar op een aantal mogelijkheden kan gebeuren. Hierdoor kan de persoon die vooraan ligt zich meer focussen op de andere coureurs achter hen houden, waardoor ook andere factoren als bandenslijtage een minder grote rol zullen spelen. De coureur krijgt hierdoor een algeheel groter strategisch voordeel, wat zijn kans op winnen nog meer vergroot.

### Argument 2.1: "Grand Prixs waar inhalen wél makkelijk is"
* VISUALISATIE MET TRACKS (UIT DATA docs) -> (grouped bar chart/histogram/distplots) die de correlatie tussen start- en eindpositie per circuit vergelijkt

Om rekening te houden met circuits waar wél veel ingehaald kan worden, hebben we de top 20 circuits genomen waarbij het meeste wordt ingehaald. Gebaseerd hierop maken we een figuur/grafiek (welke????????), waarbij te zien is wat de positie toename, of afname, is op dit circuit. Hieruit kan geconcludeerd worden dat de circuits waarbij inhalen wél makkelijk gedaan kan worden, de startpositie alsnog domineert als bepalende factor voor de eindpositie.

# Visualisatie 1: Heatmap
