In [15]:
import os
import pandas as pd


In [16]:
def load_and_combine_csvs(directory: str = "game_results") -> pd.DataFrame:
    all_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.csv')]
    combined_df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
    return combined_df

def preliminary_analysis(df: pd.DataFrame):
    # Number of games
    num_games = df['Game_ID'].nunique()
    print(f"Number of games: {num_games}")
    
    # Total number of turns
    total_turns = df.groupby('Game_ID')['Turn'].max().sum()
    print(f"Total number of turns: {total_turns}")

    # Average number of turns per game
    avg_turns_per_game = df.groupby('Game_ID')['Turn'].max().mean()
    print(f"Average number of turns per game: {avg_turns_per_game:.2f}")
    
    # Best player by average victory points
    vp_columns = [col for col in df.columns if '_VP' in col]
    df['Total_VP'] = df[vp_columns].sum(axis=1)

    # Get all unique player class names and initialize their average VP
    player_class_columns = [col for col in df.columns if col.endswith('_Class')]
    player_classes = pd.unique(df[player_class_columns].values.ravel('K'))
    avg_vp_per_player = {player_class: 0 for player_class in player_classes}

    # Calculate the average VP for each player class
    for player_class in player_classes:
        vp_sum = 0
        vp_count = 0
        for vp_col, class_col in zip(vp_columns, player_class_columns):
            vp_sum += df[df[class_col] == player_class][vp_col].sum()
            vp_count += df[df[class_col] == player_class][vp_col].count()
        avg_vp_per_player[player_class] = vp_sum / vp_count if vp_count > 0 else 0
    
    best_player = max(avg_vp_per_player, key=avg_vp_per_player.get)
    print(f"Best player (by average victory points): {best_player}")
    
    # Winning percentage per player type
    last_turns = df[df['Turn'] == df.groupby('Game_ID')['Turn'].transform(max)]
    win_percentage = last_turns['Winner'].value_counts(normalize=True) * 100
    print(f"Winning percentage per player type:\n{win_percentage}")

    # Additional insights
    print("\nAdditional Insights:")
    print(df.describe(include='all'))
    
    return {
        "num_games": num_games,
        "total_turns": total_turns,
        "avg_turns_per_game": avg_turns_per_game,
        "best_player": best_player,
        "win_percentage": win_percentage,
    }

In [17]:
directory = "game_results"  # Directory containing game CSV files
combined_df = load_and_combine_csvs(directory)
analysis_results = preliminary_analysis(combined_df)

# Save combined data and analysis results to files
combined_df.to_csv("combined_game_results.csv", index=False)
with open("analysis_results.txt", "w") as f:
    for key, value in analysis_results.items():
        f.write(f"{key}: {value}\n")

Number of games: 194
Total number of turns: 161103
Average number of turns per game: 830.43
Best player (by average victory points): ResourceHoarderPlayer
Winning percentage per player type:
Winner
BLUE          51.595745
WHITE         26.063830
RED           21.808511
Color.BLUE     0.531915
Name: proportion, dtype: float64

Additional Insights:
                 Turn Current_Player      Num_Turns         Resource_Bank  \
count   161297.000000         160546  161297.000000                160546   
unique            NaN              3            NaN                 16385   
top               NaN           BLUE            NaN  [19, 19, 19, 18, 19]   
freq              NaN          85334            NaN                   719   
mean       535.576359            NaN     216.917909                   NaN   
std        435.529401            NaN     187.701447                   NaN   
min          0.000000            NaN       0.000000                   NaN   
25%        208.000000            Na