# Persp 1

In [None]:
# Cell 1: Install and Import Required Packages
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Cell 2: Load and Prepare Data
def load_f1_data():
    """Load and prepare the F1 data"""
    df = pd.read_csv('merged_f1_data_1994_2022.csv')
    df['FinPos'] = df['FinPos'].astype(str)
    return df

# Load the data
df = load_f1_data()

# Cell 3: Data Preparation Functions
def create_heatmap_data(df, selected_year=None):
    """Create heatmap data for starting vs finishing positions"""
    
    if selected_year and selected_year != 'Total':
        df_filtered = df[df['Year'] == selected_year].copy()
        title_suffix = f" - {selected_year}"
    else:
        df_filtered = df.copy()
        title_suffix = " - All Years (1994-2022)"
    
    # Group NC, DNF, DSQ, and DQ with DNF/DSQ for display purposes
    df_filtered = df_filtered.copy()
    df_filtered['FinPos_Display'] = df_filtered['FinPos'].replace({
        'NC': 'DNF/DSQ',
        'DNF': 'DNF/DSQ', 
        'DSQ': 'DNF/DSQ',
        'DQ': 'DNF/DSQ'
    })
    
    # Get the range of starting positions (limit to reasonable range)
    max_start_pos = min(df_filtered['Pos'].max(), 26)  # F1 grid is max 26 cars
    
    # Create categories for finishing positions
    numeric_positions = []
    non_numeric_positions = []
    
    for pos in df_filtered['FinPos_Display'].unique():
        try:
            num_pos = int(float(pos))
            if 1 <= num_pos <= 30:  # Reasonable range for F1
                numeric_positions.append(num_pos)
        except (ValueError, TypeError):
            if pos != 'nan':  # Skip NaN values
                non_numeric_positions.append(pos)
    
    # Sort numeric positions
    numeric_positions = sorted(set(numeric_positions))
    
    # Define non-finisher categories with DNF/DSQ combined
    priority_codes = ['DNF/DSQ']
    other_dnf_codes = ['DNS', 'EX', 'WD']
    
    # Create ordered list of non-finisher codes
    present_priority_codes = [code for code in priority_codes if code in non_numeric_positions]
    present_other_codes = [code for code in other_dnf_codes if code in non_numeric_positions]
    remaining_codes = [code for code in non_numeric_positions 
                      if code not in priority_codes and code not in other_dnf_codes]
    
    # Create the full list of finish position categories
    finish_categories = numeric_positions + present_priority_codes + present_other_codes + sorted(remaining_codes)
    start_categories = list(range(1, max_start_pos + 1))
    
    # Create the heatmap matrix
    heatmap_data = np.zeros((len(finish_categories), len(start_categories)))
    
    # Fill the heatmap matrix
    for _, row in df_filtered.iterrows():
        start_pos = row['Pos']
        finish_pos = row['FinPos_Display']  # Use the modified display position
        
        if start_pos in start_categories:
            start_idx = start_categories.index(start_pos)
            
            # Handle finish position
            try:
                finish_pos_int = int(float(finish_pos))
                if finish_pos_int in finish_categories:
                    finish_idx = finish_categories.index(finish_pos_int)
                    heatmap_data[finish_idx, start_idx] += 1
            except (ValueError, TypeError):
                if finish_pos in finish_categories:
                    finish_idx = finish_categories.index(finish_pos)
                    heatmap_data[finish_idx, start_idx] += 1
    
    return heatmap_data, start_categories, finish_categories, title_suffix, len(df_filtered)

# Cell 4: Create Interactive Plotly Heatmap with Built-in Dropdown
def create_interactive_plotly_heatmap(df):
    """Create an interactive heatmap with Plotly's built-in dropdown (works in HTML)"""
    
    # Get all available years
    years = sorted(df['Year'].unique())
    all_years = ['Total'] + years
    
    # Create figure
    fig = go.Figure()
    
    # Create heatmaps for all years and individual years
    for i, year in enumerate(all_years):
        year_value = None if year == 'Total' else year
        heatmap_data, start_categories, finish_categories, title_suffix, total_records = create_heatmap_data(df, year_value)
        
        # Create hover text with percentages
        hover_text = []
        for row_idx in range(len(finish_categories)):
            hover_row = []
            for col_idx in range(len(start_categories)):
                count = int(heatmap_data[row_idx, col_idx])
                percentage = (count / total_records * 100) if total_records > 0 else 0
                hover_row.append(f'Starting: P{start_categories[col_idx]}<br>Finishing: {finish_categories[row_idx]}<br>Count: {count}<br>Percentage: {percentage:.1f}%')
            hover_text.append(hover_row)
        
        # Add heatmap trace
        fig.add_trace(
            go.Heatmap(
                z=heatmap_data,
                x=[f"P{pos}" for pos in start_categories],
                y=[str(pos) for pos in finish_categories],
                colorscale='Viridis',
                hoverongaps=False,
                hovertemplate='%{customdata}<extra></extra>',
                customdata=hover_text,
                colorbar=dict(title="Number of Occurrences"),
                visible=(i == 0),  # Only show the first trace initially
                name=f"{year}" if year != 'Total' else "All Years"
            )
        )
    
    # Create dropdown buttons
    dropdown_buttons = []
    for i, year in enumerate(all_years):
        # Create visibility list (all False except current)
        visibility = [False] * len(all_years)
        visibility[i] = True
        
        year_value = None if year == 'Total' else year
        _, _, _, title_suffix, total_records = create_heatmap_data(df, year_value)
        
        dropdown_buttons.append(
            dict(
                label=f"{year}" if year != 'Total' else "All Years (Total)",
                method="update",
                args=[
                    {"visible": visibility},
                    {"title": f"F1 Starting vs Finishing Position Heatmap{title_suffix}<br><sub>Total Records: {total_records:,}</sub>"}
                ]
            )
        )
    
    # Update layout with full-width dropdown and proper sizing
    fig.update_layout(
        title={
            'text': 'F1 Starting vs Finishing Position Heatmap - All Years (1994-2022)<br><sub>Total Records: {:,}</sub>'.format(len(df)),
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 18}
        },
        xaxis_title='Starting Position',
        yaxis_title='Finishing Position',
        xaxis=dict(
            side='bottom',
            showgrid=True,
            gridwidth=1,
            gridcolor='rgba(255,255,255,0.2)'
        ),
        yaxis=dict(
            autorange='reversed',  # Reverse y-axis so P1 is at top
            showgrid=True,
            gridwidth=1,
            gridcolor='rgba(255,255,255,0.2)'
        ),
        width=1200,  # Increased width
        height=800,  # Increased height
        font=dict(size=12),
        margin=dict(l=80, r=80, t=120, b=80),  # Better margins
        updatemenus=[
            dict(
                buttons=dropdown_buttons,
                direction="down",
                showactive=True,
                x=0.5,  # Center the dropdown
                xanchor="center",
                y=1.02,  # Position above the plot
                yanchor="bottom",
                bgcolor="rgba(255,255,255,0.9)",
                bordercolor="rgba(0,0,0,0.2)",
                borderwidth=1,
                font=dict(size=12),
                # Make dropdown wider and add scrolling capability
                type="dropdown",
                active=0,
                # Custom styling for full-width appearance
                pad={"r": 10, "t": 10, "b": 10, "l": 10},
            ),
        ],
        # Add scrolling capability and responsive design
        autosize=True,
        # Ensure the plot fills the container
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)'
    )
    
    # Configure the layout to be fully responsive
    fig.update_layout(
        # Make the plot responsive and full-width
        autosize=True,
        margin=dict(l=60, r=60, t=100, b=60),
    )
    
    return fig

# Cell 5: Display the Interactive Heatmap with Custom Configuration
# This works in both Jupyter notebooks and HTML exports

# Create the figure
fig = create_interactive_plotly_heatmap(df)

# Configure the figure for optimal display
config = {
    'displayModeBar': True,
    'displaylogo': False,
    'modeBarButtonsToRemove': ['pan2d', 'lasso2d', 'select2d'],
    'toImageButtonOptions': {
        'format': 'png',
        'filename': 'f1_heatmap',
        'height': 800,
        'width': 1200,
        'scale': 2
    },
    'scrollZoom': True,
    'responsive': True
}

# Show the figure with custom configuration
fig.show(config=config)

# Alternative: For HTML export, you can also save as a standalone HTML file
# fig.write_html("f1_heatmap_interactive.html", config=config, include_plotlyjs=True)

# Cell 6: Alternative - Create Individual Static Heatmaps for Each Year
def create_all_year_heatmaps(df, save_html=False):
    """Create separate heatmaps for each year (useful for HTML export)"""
    
    years = ['Total'] + sorted(df['Year'].unique())
    
    for year in years:
        year_value = None if year == 'Total' else year
        heatmap_data, start_categories, finish_categories, title_suffix, total_records = create_heatmap_data(df, year_value)
        
        # Create hover text
        hover_text = []
        for row_idx in range(len(finish_categories)):
            hover_row = []
            for col_idx in range(len(start_categories)):
                count = int(heatmap_data[row_idx, col_idx])
                percentage = (count / total_records * 100) if total_records > 0 else 0
                hover_row.append(f'Starting: P{start_categories[col_idx]}<br>Finishing: {finish_categories[row_idx]}<br>Count: {count}<br>Percentage: {percentage:.1f}%')
            hover_text.append(hover_row)
        
        # Create individual figure
        fig = go.Figure(data=go.Heatmap(
            z=heatmap_data,
            x=[f"P{pos}" for pos in start_categories],
            y=[str(pos) for pos in finish_categories],
            colorscale='Viridis',
            hoverongaps=False,
            hovertemplate='%{customdata}<extra></extra>',
            customdata=hover_text,
            colorbar=dict(title="Number of Occurrences")
        ))
        
        fig.update_layout(
            title={
                'text': f'F1 Starting vs Finishing Position Heatmap{title_suffix}<br><sub>Total Records: {total_records:,}</sub>',
                'x': 0.5,
                'xanchor': 'center',
                'font': {'size': 16}
            },
            xaxis_title='Starting Position',
            yaxis_title='Finishing Position',
            xaxis=dict(side='bottom'),
            yaxis=dict(autorange='reversed'),
            width=1000,
            height=700,
            font=dict(size=12)
        )
        
        if save_html:
            filename = f"f1_heatmap_{year if year != 'Total' else 'all_years'}.html"
            fig.write_html(filename)
        
        fig.show()