In [173]:
import pandas as pd
import os

# Load the data
matches_path = os.path.join('..', 'valorant', 'vct_2025', 'matches', 'overview.csv')
players_path = os.path.join('..', 'valorant', 'vct_2025', 'players_stats', 'players_stats.csv')

df_matches = pd.read_csv(matches_path)
df_players = pd.read_csv(players_path)

print(f"‚úÖ Loaded {len(df_matches):,} match records")
print(f"‚úÖ Loaded {len(df_players):,} player stat records")

‚úÖ Loaded 53,226 match records
‚úÖ Loaded 17,996 player stat records


In [174]:
print("="*60)
print("MATCHES DATASET")
print("="*60)
print(f"Shape: {df_matches.shape[0]} rows x {df_matches.shape[1]} columns\n")
print("Columns:")
for i, (col, dtype) in enumerate(df_matches.dtypes.items(), 1):
    print(f"  {i:2}. {col:<30} ({dtype})")

MATCHES DATASET
Shape: 53226 rows x 21 columns

Columns:
   1. Tournament                     (object)
   2. Stage                          (object)
   3. Match Type                     (object)
   4. Match Name                     (object)
   5. Map                            (object)
   6. Player                         (object)
   7. Team                           (object)
   8. Agents                         (object)
   9. Rating                         (float64)
  10. Average Combat Score           (float64)
  11. Kills                          (float64)
  12. Deaths                         (float64)
  13. Assists                        (float64)
  14. Kills - Deaths (KD)            (float64)
  15. Kill, Assist, Trade, Survive % (object)
  16. Average Damage Per Round       (float64)
  17. Headshot %                     (object)
  18. First Kills                    (float64)
  19. First Deaths                   (float64)
  20. Kills - Deaths (FKD)           (float64)
  21. Side   

In [175]:
print("="*60)
print("PLAYERS DATASET")
print("="*60)
print(f"Shape: {df_players.shape[0]} rows x {df_players.shape[1]} columns\n")
print("Columns:")
for i, (col, dtype) in enumerate(df_players.dtypes.items(), 1):
    print(f"  {i:2}. {col:<30} ({dtype})")

PLAYERS DATASET
Shape: 17996 rows x 25 columns

Columns:
   1. Tournament                     (object)
   2. Stage                          (object)
   3. Match Type                     (object)
   4. Player                         (object)
   5. Teams                          (object)
   6. Agents                         (object)
   7. Rounds Played                  (int64)
   8. Rating                         (float64)
   9. Average Combat Score           (float64)
  10. Kills:Deaths                   (float64)
  11. Kill, Assist, Trade, Survive % (object)
  12. Average Damage Per Round       (float64)
  13. Kills Per Round                (float64)
  14. Assists Per Round              (float64)
  15. First Kills Per Round          (float64)
  16. First Deaths Per Round         (float64)
  17. Headshot %                     (object)
  18. Clutch Success %               (object)
  19. Clutches (won/played)          (object)
  20. Maximum Kills in a Single Map  (int64)
  21. Kills      

In [176]:
print("Sample match record:")
df_matches.head(3)

Sample match record:


Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Player,Team,Agents,Rating,Average Combat Score,...,Deaths,Assists,Kills - Deaths (KD),"Kill, Assist, Trade, Survive %",Average Damage Per Round,Headshot %,First Kills,First Deaths,Kills - Deaths (FKD),Side
0,Valorant Champions 2025,Group Stage,Opening (A),Paper Rex vs Xi Lai Gaming,Bind,something,Paper Rex,yoru,1.63,258.0,...,12.0,12.0,9.0,91%,167.0,15%,4.0,0.0,4.0,both
1,Valorant Champions 2025,Group Stage,Opening (A),Paper Rex vs Xi Lai Gaming,Bind,something,Paper Rex,yoru,1.33,199.0,...,5.0,6.0,2.0,100%,137.0,16%,1.0,0.0,1.0,attack
2,Valorant Champions 2025,Group Stage,Opening (A),Paper Rex vs Xi Lai Gaming,Bind,something,Paper Rex,yoru,1.88,308.0,...,7.0,6.0,7.0,83%,192.0,13%,3.0,0.0,3.0,defend


In [177]:
print("Sample player stats record:")
df_players.head(3)

Sample player stats record:


Unnamed: 0,Tournament,Stage,Match Type,Player,Teams,Agents,Rounds Played,Rating,Average Combat Score,Kills:Deaths,...,First Deaths Per Round,Headshot %,Clutch Success %,Clutches (won/played),Maximum Kills in a Single Map,Kills,Deaths,Assists,First Kills,First Deaths
0,Valorant Champions 2025,Playoffs,Upper Quarterfinals,Boo,Team Heretics,astra,26,0.68,129.0,0.61,...,0.12,44%,,0/4,11,11,18,4,3,3
1,Valorant Champions 2025,Playoffs,Upper Quarterfinals,Boo,Team Heretics,omen,19,0.46,98.0,0.4,...,0.26,37%,33%,1/3,6,6,15,5,1,5
2,Valorant Champions 2025,Playoffs,Upper Quarterfinals,Boo,Team Heretics,"astra, omen",45,0.59,114.0,0.52,...,0.18,41%,14%,1/7,11,17,33,9,4,8


In [178]:
# What maps are in the data?
map_col = 'Map' if 'Map' in df_matches.columns else 'map' if 'map' in df_matches.columns else None
if map_col:
    print(f"üó∫Ô∏è Maps ({df_matches[map_col].nunique()} unique):")
    print(df_matches[map_col].value_counts())
else:
    print("Map column not found. Available columns:")
    print(df_matches.columns.tolist())

üó∫Ô∏è Maps (12 unique):
All Maps    14946
Lotus        7050
Haven        5550
Ascent       3750
Icebox       3540
Bind         3240
Split        3120
Pearl        2910
Fracture     2640
Sunset       2520
Corrode      2190
Abyss        1770
Name: Map, dtype: int64


In [179]:
# What agents are played? (check players dataset)
agent_col = 'Agents' if 'Agents' in df_players.columns else 'agent' if 'agent' in df_players.columns else None
if agent_col:
    print(f"üé≠ Agents ({df_players[agent_col].nunique()} unique):")
    print(df_players[agent_col].value_counts().head(15))
else:
    print("Agent column not found. Available columns:")
    print(df_players.columns.tolist())

üé≠ Agents (1001 unique):
omen       1319
viper      1094
sova        942
fade        823
cypher      809
yoru        785
raze        722
vyse        708
breach      663
tejo        634
neon        612
jett        606
killjoy     582
astra       521
kayo        466
Name: Agents, dtype: int64


In [180]:
# How many unique players?
player_col = 'Player' if 'Player' in df_players.columns else 'player' if 'player' in df_players.columns else 'player_name' if 'player_name' in df_players.columns else None
if player_col:
    print(f"üë§ Unique Players: {df_players[player_col].nunique()}")
    print(f"\nTop 10 players by appearances:")
    print(df_players[player_col].value_counts().head(10))
else:
    print("Player column not found. Available columns:")
    print(df_players.columns.tolist())

üë§ Unique Players: 313

Top 10 players by appearances:
Alfajer      151
paTiTek      148
keiko        140
kamo         138
N4RRATE      132
Zellsis      130
kaajak       129
JonahP       128
iZu          126
Chronicle    125
Name: Player, dtype: int64


In [181]:
# What teams are in the data?
team_col = 'Teams' if 'Teams' in df_players.columns else 'team' if 'team' in df_players.columns else None
if team_col:
    print(f"üèÜ Unique Teams: {df_players[team_col].nunique()}")
    print(f"\nTop 15 teams by player appearances:")
    print(df_players[team_col].value_counts().head(15))
else:
    print("Team column not found. Checking matches dataset...")
    print(df_matches.columns.tolist())

üèÜ Unique Teams: 58

Top 15 teams by player appearances:
Team Liquid        701
FNATIC             623
Sentinels          593
Rex Regum Qeon     581
T1                 574
G2 Esports         573
Paper Rex          562
EDward Gaming      562
Bilibili Gaming    514
DRX                500
Xi Lai Gaming      485
MIBR               479
Team Heretics      478
Mega Minors        476
GIANTX             428
Name: Teams, dtype: int64


In [182]:
print("MATCHES - Missing Values:")
missing_matches = df_matches.isnull().sum()
missing_matches = missing_matches[missing_matches > 0]
if len(missing_matches) > 0:
    print(missing_matches)
else:
    print("  ‚úÖ No missing values!")

MATCHES - Missing Values:
Rating                            4080
Average Combat Score              2745
Kills                             2550
Deaths                            2550
Assists                           2550
Kills - Deaths (KD)               2556
Kill, Assist, Trade, Survive %    3882
Average Damage Per Round          4056
Headshot %                        3849
First Kills                       3960
First Deaths                      3957
Kills - Deaths (FKD)              3957
dtype: int64


In [183]:
print("PLAYERS - Missing Values:")
missing_players = df_players.isnull().sum()
missing_players = missing_players[missing_players > 0]
if len(missing_players) > 0:
    print(missing_players)
else:
    print("  ‚úÖ No missing values!")

PLAYERS - Missing Values:
Rating                             1392
Average Combat Score                 10
Kill, Assist, Trade, Survive %     1304
Average Damage Per Round           1370
First Kills Per Round              1351
First Deaths Per Round             1350
Headshot %                         1308
Clutch Success %                  11337
Clutches (won/played)              4733
dtype: int64


In [184]:
# Get numeric columns from players dataset
numeric_cols = df_players.select_dtypes(include=['int64', 'float64']).columns.tolist()
print(f"Numeric columns in players dataset: {numeric_cols}")

Numeric columns in players dataset: ['Rounds Played', 'Rating', 'Average Combat Score', 'Kills:Deaths', 'Average Damage Per Round', 'Kills Per Round', 'Assists Per Round', 'First Kills Per Round', 'First Deaths Per Round', 'Maximum Kills in a Single Map', 'Kills', 'Deaths', 'Assists', 'First Kills', 'First Deaths']


In [185]:
# Summary stats for key metrics
df_players.describe()

Unnamed: 0,Rounds Played,Rating,Average Combat Score,Kills:Deaths,Average Damage Per Round,Kills Per Round,Assists Per Round,First Kills Per Round,First Deaths Per Round,Maximum Kills in a Single Map,Kills,Deaths,Assists,First Kills,First Deaths
count,17996.0,16604.0,17986.0,17996.0,16626.0,17996.0,17996.0,16645.0,16646.0,17996.0,17996.0,17996.0,17996.0,17996.0,17996.0
mean,56.905979,0.988747,195.649283,1.036644,128.316312,0.688337,0.279657,0.099878,0.101812,16.917648,39.377195,39.402367,16.060736,5.273727,5.281118
std,73.473854,0.275065,49.065678,0.451347,30.629254,0.192783,0.14714,0.071383,0.068724,5.739903,52.7163,50.380248,23.28862,8.685712,8.120614
min,13.0,0.03,22.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
25%,22.0,0.82,163.0,0.77,108.0,0.56,0.17,0.05,0.05,13.0,15.0,15.0,5.0,1.0,1.0
50%,37.0,0.98,193.0,0.98,127.0,0.68,0.26,0.09,0.09,17.0,24.0,24.0,9.0,3.0,3.0
75%,61.0,1.15,225.0,1.21,146.0,0.8,0.36,0.14,0.14,21.0,41.0,41.0,17.0,6.0,6.0
max,696.0,2.66,509.0,18.0,307.0,1.95,1.57,0.5,0.53,42.0,579.0,485.0,345.0,137.0,111.0


In [186]:
# Find common columns between datasets
common_cols = set(df_matches.columns) & set(df_players.columns)
print(f"Common columns (potential join keys): {common_cols}")

Common columns (potential join keys): {'Deaths', 'Match Type', 'Average Combat Score', 'Rating', 'Headshot %', 'Average Damage Per Round', 'Stage', 'Assists', 'First Kills', 'Player', 'Agents', 'Kills', 'Kill, Assist, Trade, Survive %', 'Tournament', 'First Deaths'}


In [187]:
# Find the stat columns (ACS, K, D, A, etc.)
# Common stat column names in VCT data
possible_stats = ['ACS', 'K', 'D', 'A', 'Kills', 'Deaths', 'Assists', 'ADR', 'KAST', 'Rating']
found_stats = [col for col in possible_stats if col in df_players.columns]
print(f"Found stat columns: {found_stats}")

# Also check for lowercase versions
found_stats_lower = [col for col in df_players.columns if col.lower() in [s.lower() for s in possible_stats]]
print(f"All matching (case-insensitive): {found_stats_lower}")

Found stat columns: ['Kills', 'Deaths', 'Assists', 'Rating']
All matching (case-insensitive): ['Rating', 'Kills', 'Deaths', 'Assists']


In [188]:
# Try to find top players by ACS (or similar stat)
acs_col = 'Average Combat Score' if 'Average Combat Score' in df_players.columns else 'acs' if 'acs' in df_players.columns else None
player_col = 'Player' if 'Player' in df_players.columns else 'player' if 'player' in df_players.columns else None

if acs_col and player_col:
    # Average ACS per player (need at least 5 games)
    player_acs = df_players.groupby(player_col).agg({
        acs_col: 'mean',
        player_col: 'count'  # This will count games
    }).rename(columns={player_col: 'games'})
    
    # Filter to players with 5+ games
    qualified = player_acs[player_acs['games'] >= 5].sort_values(acs_col, ascending=False)
    print(f"üèÜ Top 10 Players by Avg ACS (min 5 games):")
    print(qualified.head(10))
else:
    print(f"Couldn't find ACS or Player columns")
    print(f"Available: {df_players.columns.tolist()}")

üèÜ Top 10 Players by Avg ACS (min 5 games):
            Average Combat Score  games
Player                                 
Sato                  252.935484     31
ZmjjKK                249.444444    117
OXY                   246.261538     65
whzy                  242.113402     97
lukxo                 238.777778     36
Derke                 238.573529     68
florescent            238.478261     23
slowly                236.344828     58
HYUNMIN               236.318182     88
aspas                 236.282609     92
