In [None]:
import pandas as pd
import numpy as np

def analyze_matchups_with_timelines(csv_path, output_path):
    # Load data
    df = pd.read_csv(csv_path)
    
    # Create normalized matchup pairs
    df['matchup'] = df.apply(
        lambda x: tuple(sorted([x['player_champion'], x['opponent_champion']])), 
        axis=1
    )
    
    # Convert CS timelines to lists of integers
    df['player_cs_timeline'] = df['player_cs_timeline'].str.split(',').apply(
        lambda x: list(map(int, x)) if isinstance(x, list) else []
    )
    df['opponent_cs_timeline'] = df['opponent_cs_timeline'].str.split(',').apply(
        lambda x: list(map(int, x)) if isinstance(x, list) else []
    )
    
    # Align timelines with sorted matchup
    def align_timelines(row):
        if row['player_champion'] != row['matchup'][0]:
            # Swap timelines if needed
            return row['opponent_cs_timeline'], row['player_cs_timeline']
        return row['player_cs_timeline'], row['opponent_cs_timeline']
    
    df[['champion_a_timeline', 'champion_b_timeline']] = df.apply(
        align_timelines, axis=1, result_type='expand'
    )
    
    # Create minute-by-minute columns
    for minute in range(15):
        df[f'champion_a_min_{minute}'] = df['champion_a_timeline'].apply(
            lambda x: x[minute] if len(x) > minute else np.nan
        )
        df[f'champion_b_min_{minute}'] = df['champion_b_timeline'].apply(
            lambda x: x[minute] if len(x) > minute else np.nan
        )
    
    # Group by matchup and calculate averages
    grouped = df.groupby('matchup').agg({
        **{f'champion_a_min_{m}': 'mean' for m in range(15)},
        **{f'champion_b_min_{m}': 'mean' for m in range(15)},
        'total_cs': 'mean',
        'opponent_total_cs': 'mean',
        'cs_per_min': 'mean',
        'gold_diff': 'mean',
        'first_10_min_cs': 'mean',
        'opponent_first_10_min_cs': 'mean',
        'win': 'mean',
        'match_id': 'count'
    }).reset_index()
    
    # Rename and format columns
    grouped = grouped.rename(columns={
        'match_id': 'games_count',
        'win': 'win_rate'
    })
    
    # Split matchup into separate columns
    grouped[['champion_a', 'champion_b']] = pd.DataFrame(
        grouped['matchup'].tolist(), index=grouped.index
    )
    
    # Create timeline summary strings
    for champ in ['a', 'b']:
        timeline_columns = [f'champion_{champ}_min_{m}' for m in range(15)]
        grouped[f'champion_{champ}_cs_timeline'] = grouped[timeline_columns].apply(
            lambda row: ",".join(f"{v:.1f}" for v in row), axis=1
        )
    
    # Select final columns
    result = grouped[[
        'champion_a', 'champion_b', 'games_count', 'win_rate',
        'champion_a_cs_timeline', 'champion_b_cs_timeline',
        'total_cs', 'opponent_total_cs', 'cs_per_min', 'gold_diff',
        'first_10_min_cs', 'opponent_first_10_min_cs'
    ]]
    
    # Sort by matchup frequency
    result = result.sort_values('games_count', ascending=False)
    
    # Save results
    result.to_csv(output_path, index=False)
    return result

# Usage example
if __name__ == "__main__":
    input_csv = "matchup_data.csv"
    output_csv = "matchup_analysis.csv"
    
    analysis_results = analyze_matchups_with_timelines(input_csv, output_csv)
    print(f"Analysis complete! Results saved to {output_csv}")
    print("\nExample timeline output:")
    print(analysis_results[['champion_a', 'champion_b', 'champion_a_cs_timeline', 'champion_b_cs_timeline']].head(2))

Analysis complete! Results saved to matchup_analysis.csv

Example timeline output:
    champion_a champion_b                             champion_a_cs_timeline  \
487      Garen        Jax  0.0,0.0,2.7,12.5,20.6,29.1,37.0,45.8,55.6,62.7...   
206    Camille      Garen  0.0,0.0,3.4,14.0,21.6,31.5,38.9,47.2,54.9,64.1...   

                                champion_b_cs_timeline  
487  0.0,0.0,3.7,12.3,20.8,27.7,36.6,43.6,50.9,56.5...  
206  0.0,0.0,2.3,10.5,19.3,25.9,35.9,43.8,52.2,60.9...  


: 