In [None]:
# Here is some early analysis on the prepared data.

def early_analysis(df):
    """
    Comprehensive analysis of Dota 2 match data

    Parameters:
    df (pandas.DataFrame): DataFrame containing Dota 2 match data
    """
    # Data Cleaning
    def clean_list_columns(x):
        if isinstance(x, str):
            # Remove brackets and split string into list
            return [float(i) for i in x.strip('[]').split(',') if i]
        return x

    # Convert string representations of lists to actual lists
    df['radiant_gold_adv'] = df['radiant_gold_adv'].apply(clean_list_columns)
    df['radiant_xp_adv'] = df['radiant_xp_adv'].apply(clean_list_columns)

    # Convert timestamps to datetime
    df['start_time'] = pd.to_datetime(df['start_time'])

    # Create derived features
    df['match_length_minutes'] = df['duration'] / 60
    df['gold_difference_final'] = df['radiant_gold_adv'].apply(lambda x: x[-1] if isinstance(x, list) and len(x) > 0 else np.nan)
    df['xp_difference_final'] = df['radiant_xp_adv'].apply(lambda x: x[-1] if isinstance(x, list) and len(x) > 0 else np.nan)
    df['total_kills'] = df['radiant_score'] + df['dire_score']
    df['kill_difference'] = df['radiant_score'] - df['dire_score']

    # Analysis results
    analysis = {
        'basic_stats': {
            'total_matches': len(df),
            'radiant_win_rate': (df['radiant_win'].mean() * 100),
            'avg_match_duration': df['match_length_minutes'].mean(),
            'avg_first_blood_time': df['first_blood_time'].mean(),
            'avg_total_kills': df['total_kills'].mean()
        },
        'match_patterns': {
            'comeback_rate': (df['comeback'].mean() * 100),
            'stomp_rate': (df['stomp'].mean() * 100),
            'throw_rate': (df['throw'].mean() * 100)
        }
    }

    # Calculate win conditions
    analysis['win_conditions'] = {
        'gold_lead_win_rate': len(df[(df['gold_difference_final'] > 0) & (df['radiant_win'])] ) / len(df[df['gold_difference_final'] > 0]) * 100,
        'xp_lead_win_rate': len(df[(df['xp_difference_final'] > 0) & (df['radiant_win'])] ) / len(df[df['xp_difference_final'] > 0]) * 100
    }

    return analysis

def plot_match_metrics(df):
    """
    Create visualizations for key match metrics
    """
    plt.figure(figsize=(15, 10))

    # Plot 1: Match Duration Distribution
    plt.subplot(2, 2, 1)
    sns.histplot(df['match_length_minutes'], bins=30)
    plt.title('Match Duration Distribution')
    plt.xlabel('Duration (minutes)')

    # Plot 2: Kill Distribution
    plt.subplot(2, 2, 2)
    sns.histplot(df['total_kills'], bins=30)
    plt.title('Total Kills Distribution')
    plt.xlabel('Total Kills')

    # Plot 3: Gold Advantage vs Win Rate
    plt.subplot(2, 2, 3)
    sns.scatterplot(data=df, x='gold_difference_final', y='radiant_win')
    plt.title('Gold Advantage vs Win Rate')
    plt.xlabel('Final Gold Difference')

    # Plot 4: First Blood Time Distribution
    plt.subplot(2, 2, 4)
    sns.histplot(df['first_blood_time'], bins=30)
    plt.title('First Blood Time Distribution')
    plt.xlabel('Time (seconds)')

    plt.tight_layout()
    return plt

analysis_results = early_analysis(all_matches_df)
plots = plot_match_metrics(all_matches_df)