# Chess Game Data Analysis

This notebook provides comprehensive analysis and visualization of chess game data.

## Data Overview
- **Total Games**: 5,863 games
- **Date Range**: 2023-2025
- **Main Player**: teoriat
- **Data Source**: Chess.com API

In [None]:
# Import required libraries
from sqlalchemy import create_engine
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import re
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print("Libraries imported successfully")

In [None]:
# Load data from PostgreSQL database
engine = create_engine("postgresql+psycopg2://postgres:chess_engine@localhost/chess_data")
df = pd.read_sql("SELECT * FROM chess_games", engine)

print("Data loaded successfully!")
print(f"Total games: {len(df)}")
print(f"Date range: {df['end_time'].min()} to {df['end_time'].max()}")
print("\nFirst 5 rows:")
print(df.head())
print("\nColumn info:")
print(df.info())

In [None]:
# Data preprocessing and feature extraction
def extract_game_result(pgn_text, player_white, player_black, target_player='teoriat'):
    """Extract game result from PGN text"""
    if pd.isna(pgn_text):
        return 'Unknown'
    
    # Extract result from PGN
    result_match = re.search(r'\[Result "([^"]+)"\]', pgn_text)
    if not result_match:
        return 'Unknown'
    
    result = result_match.group(1)
    
    # Determine outcome from target player's perspective
    if target_player == player_white:
        if result == '1-0':
            return 'Win'
        elif result == '0-1':
            return 'Loss'
        elif result == '1/2-1/2':
            return 'Draw'
    elif target_player == player_black:
        if result == '0-1':
            return 'Win'
        elif result == '1-0':
            return 'Loss'
        elif result == '1/2-1/2':
            return 'Draw'
    
    return 'Unknown'

def extract_opening_move(pgn_text):
    """Extract the first move from PGN"""
    if pd.isna(pgn_text):
        return 'Unknown'
    
    # Look for the first move after move numbers
    move_match = re.search(r'1\. ([A-Za-z][A-Za-z0-9\-\+\=]*)', pgn_text)
    if move_match:
        return move_match.group(1)
    return 'Unknown'

def extract_opening_sequence(pgn_text, moves=2):
    """Extract opening sequence (first few moves)"""
    if pd.isna(pgn_text):
        return 'Unknown'
    
    # Extract first few moves
    pattern = r'1\. ([A-Za-z][A-Za-z0-9\-\+\=]*) ([A-Za-z][A-Za-z0-9\-\+\=]*)'
    if moves >= 2:
        pattern += r' 2\. ([A-Za-z][A-Za-z0-9\-\+\=]*) ([A-Za-z][A-Za-z0-9\-\+\=]*)'
    
    match = re.search(pattern, pgn_text)
    if match:
        if moves == 1:
            return match.group(1)
        elif moves == 2:
            return f"{match.group(1)} {match.group(2)}"
        else:
            return f"{match.group(1)} {match.group(2)} {match.group(3)} {match.group(4)}"
    return 'Unknown'

# Apply preprocessing
df['game_result'] = df.apply(lambda row: extract_game_result(row['pgn'], row['player_white'], row['player_black']), axis=1)
df['opening_move'] = df['pgn'].apply(extract_opening_move)
df['opening_sequence'] = df['pgn'].apply(lambda x: extract_opening_sequence(x, 2))

# Extract date components
df['year'] = df['end_time'].dt.year
df['month'] = df['end_time'].dt.month
df['year_month'] = df['end_time'].dt.to_period('M')
df['date'] = df['end_time'].dt.date

print("Data preprocessing completed!")
print(f"Game results distribution:")
print(df['game_result'].value_counts())
print(f"\nTop opening moves:")
print(df['opening_move'].value_counts().head(10))

## 1. Games Per Month/Year Analysis

In [None]:
# Games per month/year visualization
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
fig.suptitle('Chess Games Activity Analysis', fontsize=16, fontweight='bold')

# 1. Games per month
monthly_games = df.groupby('year_month').size().reset_index(name='game_count')
monthly_games['year_month_str'] = monthly_games['year_month'].astype(str)

ax1 = axes[0, 0]
bars = ax1.bar(range(len(monthly_games)), monthly_games['game_count'], color='skyblue', alpha=0.7)
ax1.set_xlabel('Month')
ax1.set_ylabel('Number of Games')
ax1.set_title('Games Played Per Month')
ax1.set_xticks(range(len(monthly_games)))
ax1.set_xticklabels(monthly_games['year_month_str'], rotation=45)
ax1.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height + 1,
             f'{int(height)}', ha='center', va='bottom', fontsize=9)

# 2. Games per year
yearly_games = df.groupby('year').size().reset_index(name='game_count')

ax2 = axes[0, 1]
bars2 = ax2.bar(yearly_games['year'], yearly_games['game_count'], color='lightcoral', alpha=0.7)
ax2.set_xlabel('Year')
ax2.set_ylabel('Number of Games')
ax2.set_title('Games Played Per Year')
ax2.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bar in bars2:
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + 10,
             f'{int(height)}', ha='center', va='bottom', fontsize=10)

# 3. Monthly trend line
ax3 = axes[1, 0]
ax3.plot(range(len(monthly_games)), monthly_games['game_count'], marker='o', linewidth=2, markersize=6)
ax3.set_xlabel('Month')
ax3.set_ylabel('Number of Games')
ax3.set_title('Monthly Gaming Activity Trend')
ax3.set_xticks(range(len(monthly_games)))
ax3.set_xticklabels(monthly_games['year_month_str'], rotation=45)
ax3.grid(True, alpha=0.3)

# 4. Daily activity heatmap (sample)
df['day_of_week'] = df['end_time'].dt.day_name()
df['hour'] = df['end_time'].dt.hour
heatmap_data = df.groupby(['day_of_week', 'hour']).size().unstack(fill_value=0)

ax4 = axes[1, 1]
sns.heatmap(heatmap_data, annot=False, cmap='YlOrRd', ax=ax4, cbar_kws={'label': 'Games Count'})
ax4.set_title('Gaming Activity Heatmap (Day vs Hour)')
ax4.set_xlabel('Hour of Day')
ax4.set_ylabel('Day of Week')

plt.tight_layout()
plt.show()

# Summary statistics
print("\n=== MONTHLY ACTIVITY SUMMARY ===")
print(f"Most active month: {monthly_games.loc[monthly_games['game_count'].idxmax(), 'year_month']} ({monthly_games['game_count'].max()} games)")
print(f"Least active month: {monthly_games.loc[monthly_games['game_count'].idxmin(), 'year_month']} ({monthly_games['game_count'].min()} games)")
print(f"Average games per month: {monthly_games['game_count'].mean():.1f}")
print(f"Total months with data: {len(monthly_games)}")

## 2. Win/Loss/Draw Statistics

In [None]:
# Win/Loss/Draw statistics visualization
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
fig.suptitle('Win/Loss/Draw Analysis', fontsize=16, fontweight='bold')

# 1. Overall Win/Loss/Draw pie chart
result_counts = df['game_result'].value_counts()
colors = ['#2E8B57', '#DC143C', '#FFD700', '#808080']  # Green, Red, Gold, Gray

ax1 = axes[0, 0]
wedges, texts, autotexts = ax1.pie(result_counts.values, labels=result_counts.index, autopct='%1.1f%%', 
                                   colors=colors[:len(result_counts)], startangle=90)
ax1.set_title('Overall Game Results Distribution')

# 2. Win/Loss/Draw by time control
time_control_results = pd.crosstab(df['time_control'], df['game_result'])
ax2 = axes[0, 1]
time_control_results.plot(kind='bar', ax=ax2, color=['#DC143C', '#FFD700', '#808080', '#2E8B57'])
ax2.set_title('Results by Time Control')
ax2.set_xlabel('Time Control')
ax2.set_ylabel('Number of Games')
ax2.legend(title='Result')
ax2.tick_params(axis='x', rotation=45)

# 3. Win rate over time
monthly_results = df.groupby(['year_month', 'game_result']).size().unstack(fill_value=0)
monthly_results['total'] = monthly_results.sum(axis=1)
monthly_results['win_rate'] = (monthly_results.get('Win', 0) / monthly_results['total']) * 100
monthly_results['loss_rate'] = (monthly_results.get('Loss', 0) / monthly_results['total']) * 100
monthly_results['draw_rate'] = (monthly_results.get('Draw', 0) / monthly_results['total']) * 100

ax3 = axes[1, 0]
months = range(len(monthly_results))
ax3.plot(months, monthly_results['win_rate'], marker='o', label='Win Rate', color='green', linewidth=2)
ax3.plot(months, monthly_results['loss_rate'], marker='s', label='Loss Rate', color='red', linewidth=2)
ax3.plot(months, monthly_results['draw_rate'], marker='^', label='Draw Rate', color='gold', linewidth=2)
ax3.set_xlabel('Month')
ax3.set_ylabel('Percentage (%)')
ax3.set_title('Win/Loss/Draw Rate Over Time')
ax3.legend()
ax3.grid(True, alpha=0.3)
ax3.set_xticks(months[::2])  # Show every other month
ax3.set_xticklabels([str(monthly_results.index[i]) for i in months[::2]], rotation=45)

# 4. Results by rating difference
df['rating_diff'] = df.apply(lambda row: 
    row['rating_white'] - row['rating_black'] if row['player_white'] == 'teoriat' 
    else row['rating_black'] - row['rating_white'], axis=1)

# Create rating difference bins
df['rating_diff_bin'] = pd.cut(df['rating_diff'], bins=[-500, -100, -50, 0, 50, 100, 500], 
                               labels=['<<-100', '-100 to -50', '-50 to 0', '0 to 50', '50 to 100', '>>100'])

rating_results = pd.crosstab(df['rating_diff_bin'], df['game_result'])
ax4 = axes[1, 1]
rating_results.plot(kind='bar', ax=ax4, color=['#DC143C', '#FFD700', '#808080', '#2E8B57'])
ax4.set_title('Results by Rating Difference')
ax4.set_xlabel('Rating Difference (Player - Opponent)')
ax4.set_ylabel('Number of Games')
ax4.legend(title='Result')
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Summary statistics
print("\n=== WIN/LOSS/DRAW SUMMARY ===")
total_games = len(df)
for result, count in result_counts.items():
    percentage = (count / total_games) * 100
    print(f"{result}: {count} games ({percentage:.1f}%)")

print(f"\nOverall win rate: {(result_counts.get('Win', 0) / total_games * 100):.1f}%")
print(f"Games with decisive results: {(result_counts.get('Win', 0) + result_counts.get('Loss', 0))} ({((result_counts.get('Win', 0) + result_counts.get('Loss', 0)) / total_games * 100):.1f}%)")
print(f"Average rating difference: {df['rating_diff'].mean():.1f}")

## 3. Opening Move Frequencies

In [None]:
# Opening move frequencies visualization
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
fig.suptitle('Opening Move Analysis', fontsize=16, fontweight='bold')

# 1. Most common opening moves
top_openings = df['opening_move'].value_counts().head(10)
ax1 = axes[0, 0]
bars = ax1.barh(range(len(top_openings)), top_openings.values, color='lightblue')
ax1.set_yticks(range(len(top_openings)))
ax1.set_yticklabels(top_openings.index)
ax1.set_xlabel('Number of Games')
ax1.set_title('Top 10 Opening Moves')
ax1.grid(axis='x', alpha=0.3)

# Add value labels
for i, bar in enumerate(bars):
    width = bar.get_width()
    ax1.text(width + 1, bar.get_y() + bar.get_height()/2, 
             f'{int(width)}', ha='left', va='center')

# 2. Opening sequences (first 2 moves)
top_sequences = df['opening_sequence'].value_counts().head(10)
ax2 = axes[0, 1]
ax2.pie(top_sequences.values, labels=top_sequences.index, autopct='%1.1f%%', startangle=90)
ax2.set_title('Top 10 Opening Sequences (2 moves)')

# 3. Opening moves as White vs Black
white_openings = df[df['player_white'] == 'teoriat']['opening_move'].value_counts().head(8)
black_responses = df[df['player_black'] == 'teoriat']['opening_move'].value_counts().head(8)

ax3 = axes[1, 0]
x_pos = np.arange(len(white_openings))
width = 0.35

bars1 = ax3.bar(x_pos - width/2, white_openings.values, width, label='As White', color='lightcoral')
bars2 = ax3.bar(x_pos + width/2, [black_responses.get(move, 0) for move in white_openings.index], 
                width, label='Opponent as White', color='lightblue')

ax3.set_xlabel('Opening Moves')
ax3.set_ylabel('Number of Games')
ax3.set_title('Opening Moves: As White vs Facing as Black')
ax3.set_xticks(x_pos)
ax3.set_xticklabels(white_openings.index, rotation=45)
ax3.legend()
ax3.grid(axis='y', alpha=0.3)

# 4. Success rate by opening move
opening_success = df[df['game_result'].isin(['Win', 'Loss', 'Draw'])].groupby('opening_move').agg({
    'game_result': ['count', lambda x: (x == 'Win').sum()]
}).round(2)
opening_success.columns = ['total_games', 'wins']
opening_success['win_rate'] = (opening_success['wins'] / opening_success['total_games'] * 100).round(1)
opening_success = opening_success[opening_success['total_games'] >= 10].sort_values('win_rate', ascending=False).head(10)

ax4 = axes[1, 1]
bars = ax4.bar(range(len(opening_success)), opening_success['win_rate'], color='lightgreen')
ax4.set_xlabel('Opening Moves')
ax4.set_ylabel('Win Rate (%)')
ax4.set_title('Win Rate by Opening Move (≥10 games)')
ax4.set_xticks(range(len(opening_success)))
ax4.set_xticklabels(opening_success.index, rotation=45)
ax4.grid(axis='y', alpha=0.3)

# Add value labels
for i, bar in enumerate(bars):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height + 0.5,
             f'{height:.1f}%', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Summary statistics
print("\n=== OPENING MOVES SUMMARY ===")
print(f"Most common opening move: {top_openings.index[0]} ({top_openings.iloc[0]} games, {top_openings.iloc[0]/len(df)*100:.1f}%)")
print(f"Most common opening sequence: {top_sequences.index[0]} ({top_sequences.iloc[0]} games)")
print(f"Total unique opening moves: {df['opening_move'].nunique()}")
print(f"Total unique opening sequences: {df['opening_sequence'].nunique()}")
print(f"\nGames as White: {len(df[df['player_white'] == 'teoriat'])} ({len(df[df['player_white'] == 'teoriat'])/len(df)*100:.1f}%)")
print(f"Games as Black: {len(df[df['player_black'] == 'teoriat'])} ({len(df[df['player_black'] == 'teoriat'])/len(df)*100:.1f}%)")

if len(opening_success) > 0:
    print(f"\nBest performing opening (≥10 games): {opening_success.index[0]} ({opening_success.iloc[0]['win_rate']:.1f}% win rate)")

## 4. Time Controls Analysis

In [None]:
# Time controls analysis
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
fig.suptitle('Time Controls Analysis', fontsize=16, fontweight='bold')

# 1. Time control distribution
time_control_counts = df['time_control'].value_counts()
ax1 = axes[0, 0]
wedges, texts, autotexts = ax1.pie(time_control_counts.values, labels=time_control_counts.index, 
                                   autopct='%1.1f%%', startangle=90)
ax1.set_title('Time Control Distribution')

# 2. Time class distribution
time_class_counts = df['time_class'].value_counts()
ax2 = axes[0, 1]
bars = ax2.bar(time_class_counts.index, time_class_counts.values, color=['skyblue', 'lightcoral', 'lightgreen', 'gold'])
ax2.set_xlabel('Time Class')
ax2.set_ylabel('Number of Games')
ax2.set_title('Time Class Distribution')
ax2.grid(axis='y', alpha=0.3)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + 10,
             f'{int(height)}', ha='center', va='bottom')

# 3. Time control vs results
time_control_results = pd.crosstab(df['time_control'], df['game_result'])
ax3 = axes[1, 0]
time_control_results.plot(kind='bar', ax=ax3, color=['#DC143C', '#FFD700', '#808080', '#2E8B57'])
ax3.set_title('Results by Time Control')
ax3.set_xlabel('Time Control')
ax3.set_ylabel('Number of Games')
ax3.legend(title='Result')
ax3.tick_params(axis='x', rotation=45)

# 4. Time control usage over time
time_control_monthly = df.groupby(['year_month', 'time_control']).size().unstack(fill_value=0)
ax4 = axes[1, 1]
time_control_monthly.plot(kind='area', ax=ax4, alpha=0.7)
ax4.set_title('Time Control Usage Over Time')
ax4.set_xlabel('Month')
ax4.set_ylabel('Number of Games')
ax4.legend(title='Time Control', bbox_to_anchor=(1.05, 1), loc='upper left')
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Detailed time control analysis
print("\n=== TIME CONTROLS SUMMARY ===")
print("Time Control Distribution:")
for tc, count in time_control_counts.items():
    percentage = (count / len(df)) * 100
    print(f"  {tc}: {count} games ({percentage:.1f}%)")

print("\nTime Class Distribution:")
for tc, count in time_class_counts.items():
    percentage = (count / len(df)) * 100
    print(f"  {tc}: {count} games ({percentage:.1f}%)")

# Win rates by time control
print("\nWin Rates by Time Control:")
for tc in time_control_counts.index:
    tc_games = df[df['time_control'] == tc]
    wins = len(tc_games[tc_games['game_result'] == 'Win'])
    total = len(tc_games[tc_games['game_result'].isin(['Win', 'Loss', 'Draw'])])
    if total > 0:
        win_rate = (wins / total) * 100
        print(f"  {tc}: {win_rate:.1f}% ({wins}/{total} games)")

## 5. Additional Analysis & Insights

In [None]:
# Additional analysis and insights
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
fig.suptitle('Additional Analysis & Insights', fontsize=16, fontweight='bold')

# 1. Rating progression over time
# Get average rating for each game (white or black)
df['player_rating'] = df.apply(lambda row: 
    row['rating_white'] if row['player_white'] == 'teoriat' 
    else row['rating_black'], axis=1)

# Monthly average rating
monthly_rating = df.groupby('year_month')['player_rating'].mean()
ax1 = axes[0, 0]
ax1.plot(range(len(monthly_rating)), monthly_rating.values, marker='o', linewidth=2, markersize=6)
ax1.set_xlabel('Month')
ax1.set_ylabel('Average Rating')
ax1.set_title('Rating Progression Over Time')
ax1.grid(True, alpha=0.3)
ax1.set_xticks(range(len(monthly_rating)))
ax1.set_xticklabels([str(month) for month in monthly_rating.index], rotation=45)

# 2. Opponent rating distribution
df['opponent_rating'] = df.apply(lambda row: 
    row['rating_black'] if row['player_white'] == 'teoriat' 
    else row['rating_white'], axis=1)

ax2 = axes[0, 1]
ax2.hist(df['opponent_rating'], bins=30, alpha=0.7, color='lightblue', edgecolor='black')
ax2.axvline(df['opponent_rating'].mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {df["opponent_rating"].mean():.0f}')
ax2.set_xlabel('Opponent Rating')
ax2.set_ylabel('Number of Games')
ax2.set_title('Opponent Rating Distribution')
ax2.legend()
ax2.grid(axis='y', alpha=0.3)

# 3. Games per day of week
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_games = df['day_of_week'].value_counts().reindex(day_order)
ax3 = axes[1, 0]
bars = ax3.bar(daily_games.index, daily_games.values, color='lightgreen')
ax3.set_xlabel('Day of Week')
ax3.set_ylabel('Number of Games')
ax3.set_title('Games by Day of Week')
ax3.tick_params(axis='x', rotation=45)
ax3.grid(axis='y', alpha=0.3)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height + 5,
             f'{int(height)}', ha='center', va='bottom')

# 4. Game duration analysis (if available in PGN)
def extract_game_duration(pgn_text):
    """Extract game duration from PGN (rough estimate based on move count)"""
    if pd.isna(pgn_text):
        return 0
    
    # Count the number of moves (rough estimate)
    moves = re.findall(r'\d+\.', pgn_text)
    return len(moves)

df['estimated_moves'] = df['pgn'].apply(extract_game_duration)
ax4 = axes[1, 1]
ax4.hist(df['estimated_moves'], bins=30, alpha=0.7, color='gold', edgecolor='black')
ax4.axvline(df['estimated_moves'].mean(), color='red', linestyle='--', linewidth=2, 
            label=f'Mean: {df["estimated_moves"].mean():.1f} moves')
ax4.set_xlabel('Estimated Number of Moves')
ax4.set_ylabel('Number of Games')
ax4.set_title('Game Length Distribution')
ax4.legend()
ax4.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

# Summary insights
print("\n=== ADDITIONAL INSIGHTS ===")
print(f"Rating progression: {monthly_rating.iloc[0]:.0f} → {monthly_rating.iloc[-1]:.0f} ({monthly_rating.iloc[-1] - monthly_rating.iloc[0]:+.0f})")
print(f"Average opponent rating: {df['opponent_rating'].mean():.0f}")
print(f"Most active day: {daily_games.idxmax()} ({daily_games.max()} games)")
print(f"Average game length: {df['estimated_moves'].mean():.1f} moves")
print(f"Longest game: {df['estimated_moves'].max()} moves")
print(f"Shortest game: {df['estimated_moves'].min()} moves")

## 6. Summary & Observations

In [None]:
# Generate comprehensive summary
print("\n" + "="*60)
print("                   CHESS ANALYSIS SUMMARY")
print("="*60)

print("\n📊 DATASET OVERVIEW:")
print(f"   • Total games analyzed: {len(df):,}")
print(f"   • Date range: {df['end_time'].min().strftime('%Y-%m-%d')} to {df['end_time'].max().strftime('%Y-%m-%d')}")
print(f"   • Analysis period: {(df['end_time'].max() - df['end_time'].min()).days} days")
print(f"   • Average games per day: {len(df) / (df['end_time'].max() - df['end_time'].min()).days:.1f}")

print("\n🎯 PERFORMANCE METRICS:")
result_counts = df['game_result'].value_counts()
total_decisive = result_counts.get('Win', 0) + result_counts.get('Loss', 0)
if total_decisive > 0:
    win_percentage = (result_counts.get('Win', 0) / total_decisive) * 100
    print(f"   • Overall win rate: {win_percentage:.1f}% ({result_counts.get('Win', 0)}/{total_decisive} decisive games)")
print(f"   • Draw rate: {(result_counts.get('Draw', 0) / len(df)) * 100:.1f}%")
print(f"   • Most successful time control: {time_control_counts.index[0]} ({time_control_counts.iloc[0]} games)")

print("\n🏆 ACTIVITY PATTERNS:")
monthly_games = df.groupby('year_month').size()
print(f"   • Most active month: {monthly_games.idxmax()} ({monthly_games.max()} games)")
print(f"   • Least active month: {monthly_games.idxmin()} ({monthly_games.min()} games)")
print(f"   • Average monthly activity: {monthly_games.mean():.1f} games")
print(f"   • Preferred day: {daily_games.idxmax()} ({daily_games.max()} games)")

print("\n♟️ PLAYING STYLE INSIGHTS:")
print(f"   • Favorite opening move: {top_openings.index[0]} ({top_openings.iloc[0]} games, {top_openings.iloc[0]/len(df)*100:.1f}%)")
print(f"   • Games as White: {len(df[df['player_white'] == 'teoriat'])} ({len(df[df['player_white'] == 'teoriat'])/len(df)*100:.1f}%)")
print(f"   • Games as Black: {len(df[df['player_black'] == 'teoriat'])} ({len(df[df['player_black'] == 'teoriat'])/len(df)*100:.1f}%)")
print(f"   • Average game length: {df['estimated_moves'].mean():.1f} moves")
print(f"   • Opening diversity: {df['opening_move'].nunique()} different opening moves")

print("\n📈 RATING & IMPROVEMENT:")
rating_change = monthly_rating.iloc[-1] - monthly_rating.iloc[0]
print(f"   • Starting rating: {monthly_rating.iloc[0]:.0f}")
print(f"   • Current rating: {monthly_rating.iloc[-1]:.0f}")
print(f"   • Rating change: {rating_change:+.0f} points")
print(f"   • Average opponent rating: {df['opponent_rating'].mean():.0f}")
print(f"   • Rating difference trend: {df['rating_diff'].mean():+.1f} points")

print("\n🔍 KEY OBSERVATIONS:")
observations = []

# Time control preference
dominant_time_control = time_control_counts.index[0]
tc_percentage = (time_control_counts.iloc[0] / len(df)) * 100
observations.append(f"Strong preference for {dominant_time_control} time control ({tc_percentage:.1f}% of games)")

# Rating trend
if rating_change > 0:
    observations.append(f"Positive rating trend with {rating_change:+.0f} point improvement")
elif rating_change < 0:
    observations.append(f"Rating decline of {abs(rating_change):.0f} points needs attention")

# Activity level
if monthly_games.mean() > 100:
    observations.append("High activity level with consistent monthly play")

# Draw rate analysis
draw_rate = (result_counts.get('Draw', 0) / len(df)) * 100
if draw_rate > 20:
    observations.append(f"High draw rate ({draw_rate:.1f}%) suggests solid positional play")

# Opening consistency
top_opening_rate = (top_openings.iloc[0] / len(df)) * 100
if top_opening_rate > 30:
    observations.append(f"High opening consistency with {top_opening_rate:.1f}% games using {top_openings.index[0]}")

for i, obs in enumerate(observations, 1):
    print(f"   {i}. {obs}")

print("\n" + "="*60)
print("Analysis completed successfully! 🎉")
print("="*60)