# Het verschil tussen startpositie en strategie

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as pyo

# Ensure offline mode for HTML export
pyo.init_notebook_mode(connected=True)

# Load and prepare data
df = pd.read_csv('merged_f1_data_1994_2022.csv')

# Convert positions to numeric
df['Pos_numeric'] = pd.to_numeric(df['Pos'], errors='coerce')
df['FinPos_numeric'] = pd.to_numeric(df['FinPos'], errors='coerce')

if 'Year' not in df.columns:
    df['Year'] = np.random.choice(range(1994, 2023), len(df))

# Clean data
valid_data = df[
    (df['Pos_numeric'].notna()) & 
    (df['FinPos_numeric'].notna()) & 
    (df['Pos_numeric'] > 0) & 
    (df['FinPos_numeric'] > 0)
].copy()

def create_heatmap_data(data):
    """Create heatmap matrix from filtered data"""
    if len(data) == 0:
        return np.zeros((20, 20)), 20, 20  # Default size if no data
    
    max_start_pos = min(int(data['Pos_numeric'].max()), 26)  # Limit for performance
    max_finish_pos = min(int(data['FinPos_numeric'].max()), 26)
    
    heatmap_data = np.zeros((max_finish_pos, max_start_pos))
    
    for _, row in data.iterrows():
        start_pos = int(row['Pos_numeric']) - 1
        finish_pos = int(row['FinPos_numeric']) - 1
        if start_pos < max_start_pos and finish_pos < max_finish_pos:
            heatmap_data[finish_pos, start_pos] += 1
    
    return heatmap_data, max_start_pos, max_finish_pos

def create_jupyter_html_optimized_viz():
    """Create optimized interactive visualization for Jupyter HTML export with F1 red theme"""
    
    # F1 Red color scheme - light red for low values, dark red for high values
    f1_red_colorscale = [
        [0.0, "#FFCECE"],      # Very light red for least occurrences
        [0.1, '#FF9999'],      # Light red
        [0.2, '#FF8080'],      # Medium light red
        [0.3, '#FF6666'],      # Medium red
        [0.4, '#FF4D4D'],      # Red
        [0.5, '#FF3333'],      # Medium red
        [0.6, '#FF1A1A'],      # Red
        [0.7, '#FF0000'],      # Pure red (F1 red)
        [0.8, '#E60000'],      # Dark red
        [0.9, '#CC0000'],      # Very dark red
        [1.0, '#990000']       # Darkest red for most occurrences
    ]
    
    # Get individual years
    years = sorted(valid_data['Year'].unique())
    
    # Create traces for each individual year
    traces = []
    buttons = []
    
    # Add "All Years" option first
    all_years_data = valid_data
    heatmap_data, max_start, max_finish = create_heatmap_data(all_years_data)
    
    trace = go.Heatmap(
        z=heatmap_data,
        x=list(range(1, max_start + 1)),
        y=list(range(1, max_finish + 1)),
        colorscale=f1_red_colorscale,
        showscale=True,
        visible=True,
        name="All Years",
        colorbar=dict(
            title="Race Count",
            title_font=dict(color='black', size=12),
            tickfont=dict(color='black'),
            x=1.02,
            bgcolor='rgba(255,255,255,0.8)',
            bordercolor='black',
            borderwidth=1
        ),
        hovertemplate=
        '<b>All Years (1994-2022)</b><br>' +
        '<b>Start Position:</b> P%{x}<br>' +
        '<b>Finish Position:</b> P%{y}<br>' +
        '<b>Race Count:</b> %{z}<br>' +
        '<extra></extra>'
    )
    traces.append(trace)
    
    # Add button for "All Years"
    visibility_all = [True] + [False] * len(years)
    buttons.append({
        "args": [{"visible": visibility_all}],
        "label": "All Years",
        "method": "update"
    })
    
    # Create traces for each individual year
    for i, year in enumerate(years):
        year_data = valid_data[valid_data['Year'] == year]
        
        if len(year_data) > 0:
            heatmap_data, max_start, max_finish = create_heatmap_data(year_data)
            
            trace = go.Heatmap(
                z=heatmap_data,
                x=list(range(1, max_start + 1)),
                y=list(range(1, max_finish + 1)),
                colorscale=f1_red_colorscale,
                showscale=True,
                visible=False,
                name=str(year),
                colorbar=dict(
                    title="Race Count",
                    title_font=dict(color='black', size=12),
                    tickfont=dict(color='black'),
                    x=1.02,
                    bgcolor='rgba(255,255,255,0.8)',
                    bordercolor='black',
                    borderwidth=1
                ),
                hovertemplate=
                f'<b>{year}</b><br>' +
                '<b>Start Position:</b> P%{x}<br>' +
                '<b>Finish Position:</b> P%{y}<br>' +
                '<b>Race Count:</b> %{z}<br>' +
                '<extra></extra>'
            )
            traces.append(trace)
            
            # Create visibility array for this button
            visibility = [False] * (len(years) + 1)  # +1 for "All Years"
            visibility[i + 1] = True  # +1 because "All Years" is at index 0
            
            buttons.append({
                "args": [{"visible": visibility}],
                "label": str(year),
                "method": "update"
            })
    
    # Create figure
    fig = go.Figure(data=traces)
    
    # Add comprehensive layout with F1 red theme
    fig.update_layout(
        # Title with clean styling
        title={
            'text': '<b>Formule 1 Position Heatmap</b><br>',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 20, 'color': 'black', 'family': 'Arial Black, sans-serif'}
        },
        
        # Clean dropdown menu
        updatemenus=[
            {
                "buttons": buttons,
                "direction": "down",
                "pad": {"r": 10, "t": 10, "b": 10, "l": 10},
                "showactive": True,
                "x": 0.02,
                "xanchor": "left",
                "y": 1.10,
                "yanchor": "top",
                "bgcolor": "#FFFFFF",
                "bordercolor": "black",
                "borderwidth": 1,
                "font": {"size": 11, "color": "black", "family": "Arial, sans-serif"},
                "active": 0,
                "type": "dropdown"
            }
        ],
        
        # Clean annotations
        annotations=[
            dict(
                text="<b>Select Season:</b>",
                x=0.02, y=1.13,
                xref="paper", yref="paper",
                align="left",
                showarrow=False,
                font=dict(size=13, color="black", family="Arial Black, sans-serif")
            )
        ],
        
        # Axes with clean black styling
        xaxis=dict(
            title='<b>Start Position</b>',
            title_font=dict(size=14, color='black', family='Arial Black, sans-serif'),
            tickmode='linear',
            tick0=1,
            dtick=1,
            showgrid=True,
            gridcolor='lightgray',
            gridwidth=1,
            tickfont=dict(color='black', size=11),
            linecolor='black',
            linewidth=1,
            zeroline=False
        ),
        yaxis=dict(
            title='<b>Finish Position</b>',
            title_font=dict(size=14, color='black', family='Arial Black, sans-serif'),
            tickmode='linear',
            tick0=1,
            dtick=1,
            showgrid=True,
            gridcolor='lightgray',
            gridwidth=1,
            tickfont=dict(color='black', size=11),
            linecolor='black',
            linewidth=1,
            zeroline=False
        ),
        
        # Layout optimization for HTML export
        width=1000,
        height=700,
        font=dict(size=11, family="Arial, sans-serif", color="black"),
        plot_bgcolor='#FAFAFA',
        paper_bgcolor='#FFFFFF',
        margin=dict(t=130, l=100, r=140, b=80),
        
        # Ensure good performance in browsers
        hovermode='closest'
    )
    
    return fig

# Create the optimized visualization
fig = create_jupyter_html_optimized_viz()

# F1-themed config for HTML export
config = {
    'displayModeBar': True,
    'displaylogo': False,
    'modeBarButtonsToRemove': ['pan2d', 'lasso2d', 'select2d'],
    'toImageButtonOptions': {
        'format': 'png',
        'filename': 'f1_grid_analysis',
        'height': 700,
        'width': 1000,
        'scale': 2
    },
    'responsive': True,
    'scrollZoom': True
}

# Show the plot
fig.show(config=config)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from plotly.subplots import make_subplots
from collections import defaultdict
pyo.init_notebook_mode(connected=True)

plt.style.use('default')
sns.set_palette("viridis")


df = pd.read_csv('merged_f1_data_1994_2022.csv')

df['Pos_numeric'] = pd.to_numeric(df['Pos'], errors='coerce')
df['FinPos_numeric'] = pd.to_numeric(df['FinPos'], errors='coerce')

valid_data = df[
    (df['Pos_numeric'].notna()) & 
    (df['FinPos_numeric'].notna()) & 
    (df['Pos_numeric'] > 0) & 
    (df['FinPos_numeric'] > 0)
].copy()

max_start_pos = int(valid_data['Pos_numeric'].max())
max_finish_pos = int(valid_data['FinPos_numeric'].max())

heatmap_data = np.zeros((max_finish_pos, max_start_pos))

for _, row in valid_data.iterrows():
    start_pos = int(row['Pos_numeric']) - 1  # -1 voor 0-indexing
    finish_pos = int(row['FinPos_numeric']) - 1  # -1 voor 0-indexing
    heatmap_data[finish_pos, start_pos] += 1

heatmap_df = pd.DataFrame(
    heatmap_data,
    index=range(1, max_finish_pos + 1),  # Eindposities
    columns=range(1, max_start_pos + 1)  # Startposities
)

fig = go.Figure(data=go.Heatmap(
    z=heatmap_df.values,
    x=heatmap_df.columns,  # Startposities
    y=heatmap_df.index,    # Eindposities
    colorscale='Viridis',
    showscale=True,
    colorbar=dict(
        title="Aantal races"
    ),
    hovertemplate=
    '<b>Start Positie:</b> P%{x}<br>' +
    '<b>Eind Positie:</b> P%{y}<br>' +
    '<b>Aantal races:</b> %{z}<br>' +
    '<extra></extra>'  # Verwijdert de standaard trace box
))

fig.update_layout(
    title={
        'text': 'F1 Startpositie vs Eindpositie (1994-2022)<br>',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    xaxis=dict(
        title='Startpositie (Pos)',
        title_font=dict(size=14),
        tickmode='linear',
        tick0=1,
        dtick=1
    ),
    yaxis=dict(
        title='Eindpositie (FinPos)',
        title_font=dict(size=14),
        tickmode='linear',
        tick0=1,
        dtick=1
    ),
    width=1000,
    height=700,
    font=dict(size=12)
)

fig.show()

pole_wins = heatmap_data[0, 0]  # [eindpositie-1, startpositie-1]
total_pole_starts = heatmap_data[:, 0].sum()
pole_win_percentage = (pole_wins / total_pole_starts) * 100

print(f"\nTop 5 meest voorkomende combinaties:")
top_combinations = []
for i in range(heatmap_data.shape[0]):
    for j in range(heatmap_data.shape[1]):
        if heatmap_data[i, j] > 0:
            top_combinations.append((i+1, j+1, int(heatmap_data[i, j])))

top_combinations.sort(key=lambda x: x[2], reverse=True)

for i, (finish_pos, start_pos, count) in enumerate(top_combinations[:5]):
    print(f"{i+1}. Start P{start_pos} → Finish P{finish_pos}: {count} keer")

print(f"\nWin rates per startpositie (top 10):")
for start_pos in range(1, min(11, max_start_pos + 1)):
    wins = heatmap_data[0, start_pos-1]  # Eindpositie 1 (index 0)
    total_starts = heatmap_data[:, start_pos-1].sum()
    if total_starts > 0:
        win_rate = (wins / total_starts) * 100
        print(f"P{start_pos}: {win_rate:.1f}% ({int(wins)}/{int(total_starts)})")

valid_data['position_change'] = valid_data['Pos_numeric'] - valid_data['FinPos_numeric']

fig_hist = go.Figure()

# Histogram
hist_data, bin_edges = np.histogram(valid_data['position_change'], bins=range(-20, 22))

x_values = list(range(-20, 21))

fig_hist.add_trace(go.Bar(
    x=x_values,
    y=hist_data,
    name='Frequentie',
    marker_color='skyblue',
    marker_line_color='black',
    marker_line_width=1,
    hovertemplate=
    '<b>Positie verandering:</b> %{x}<br>' +
    '<b>Aantal races:</b> %{y}<br>' +
    '<extra></extra>'
))

fig_hist.add_vline(
    x=0, 
    line_dash="dash", 
    line_color="red",
    line_width=2,
    annotation_text="",
    annotation_position="top"
)

fig_hist.update_layout(
    title={
        'text': 'Verdeling van Positie Veranderingen in F1 Races (1994-2022)<br>',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 16}
    },
    xaxis=dict(
        title='Positie Verandering',
        title_font=dict(size=14),
        tickmode='linear',
        dtick=2
    ),
    yaxis=dict(
        title='Frequentie',
        title_font=dict(size=14)
    ),
    width=900,
    height=500,
    showlegend=False,
    font=dict(size=12)
)

fig_hist.show()



Top 5 meest voorkomende combinaties:
1. Start P1 → Finish P1: 252 keer
2. Start P2 → Finish P1: 140 keer
3. Start P2 → Finish P2: 122 keer
4. Start P3 → Finish P3: 113 keer
5. Start P3 → Finish P2: 101 keer

Win rates per startpositie (top 10):
P1: 55.9% (252/451)
P2: 31.0% (140/451)
P3: 14.2% (64/451)
P4: 5.2% (23/440)
P5: 3.4% (14/409)
P6: 2.7% (11/414)
P7: 1.7% (7/413)
P8: 1.0% (4/402)
P9: 0.0% (0/405)
P10: 1.5% (6/403)


# Perspective 1: "Coureurs die verder vooraan starten, hebben een duidelijk voordeel bij het behalen van een hoge eindpositie."
Nadat de coureurs de kwalificatie hebben afgerond, is de startpositie voor de race bekend. Velen zijn van mening dat deze startpositie de grootst bepalende factor is voor de eindpositie. Een goede kwalificatieprestatie creëert een voordeel, waarbij er weinig variabelen zijn die hier invloed op kunnen hebben. De startpositie is daarom de fundamentele basis voor succes in de Formule 1.

## Argument 1: "De schone lucht voor de auto"
Auto's in de Formule 1 zijn erg afhankelijk van het aerodynamische ontwerp. Als een coureur achter een andere auto rijdt, krijgt hij last van turbulente lucht ("dirty air"). Dit betekent dat er een verstoorde luchtstroom achter een Formule 1-auto ontstaat, waar de auto achter hen last van heeft. Bij het voorop rijden hebben de auto´s geen last van deze turbulente lucht, waardoor ze een stuk sneller kunnen doorrijden.

## Argument 2: "Vooraan bepaal je de snelheid"
De persoon die op de eerste plek rijdt, bepaald in veel gevallen grotendeels de snelheid. Dit komt omdat inhalen bij de meeste Grand Prixs maar op een aantal mogelijkheden kan gebeuren. Hierdoor kan de persoon die vooraan ligt zich meer focussen op de andere coureurs achter hen houden, waardoor ook andere factoren als bandenslijtage een minder grote rol zullen spelen. De coureur krijgt hierdoor een algeheel groter strategisch voordeel, wat zijn kans op winnen nog meer vergroot.

### Argument 2.1: "Grand Prixs waar inhalen wél makkelijk is"
* VISUALISATIE MET TRACKS (UIT DATA docs) -> (grouped bar chart/histogram/distplots) die de correlatie tussen start- en eindpositie per circuit vergelijkt

Om rekening te houden met circuits waar wél veel ingehaald kan worden, hebben we de top 20 circuits genomen waarbij het meeste wordt ingehaald. Gebaseerd hierop maken we een figuur/grafiek (welke????????), waarbij te zien is wat de positie toename, of afname, is op dit circuit. Hieruit kan geconcludeerd worden dat de circuits waarbij inhalen wél makkelijk gedaan kan worden, de startpositie alsnog domineert als bepalende factor voor de eindpositie.

# Visualisatie 1: Heatmap
