In [98]:
import pandas as pd
import os
import glob

In [99]:
# Define legend categories
legend_categories = {
    'Assault': ['Bangalore', 'Fuse', 'Ash', 'Mad Maggie', 'Ballistic'],
    'Skirmisher': ['Pathfinder', 'Wraith', 'Octane', 'Revenant', 'Horizon', 'Valkyrie','Alter'],
    'Recon': ['Bloodhound', 'Crypto', 'Seer', 'Vantage'],
    'Support': ['Gibraltar', 'Lifeline', 'Mirage', 'Loba', 'Newcastle', 'Conduit'],
    'Controller': ['Caustic', 'Wattson', 'Rampart', 'Catalyst']
}

In [100]:
# Flatten the dictionary to map legend to category
legend_to_category = {}
for category, legends in legend_categories.items():
    for legend in legends:
        legend_to_category[legend] = category


In [101]:
base_path = r'C:\Users\altaa\Documents\GitHub\Apex-Legends-Research\New Data For Legends'

damage_path = os.path.join(base_path, 'Legend Damage')
kills_path = os.path.join(base_path, 'Legend Kills')
matches_path = os.path.join(base_path, 'Legend Matches Played')
wins_path = os.path.join(base_path, 'Legend Wins')
# Function to list files in a directory
def list_files(directory):
    print(f"Files in {directory}:")
    for f in os.listdir(directory):
        print(f)

# List files in each directory
list_files(damage_path)
list_files(kills_path)
list_files(matches_path)
list_files(wins_path)

Files in C:\Users\altaa\Documents\GitHub\Apex-Legends-Research\New Data For Legends\Legend Damage:
Alter_damage.csv
Ash_damage.csv
Ballistic_damage.csv
Bangalore_damage.csv
Bloodhound_damage.csv
Catalyst_damage.csv
Caustic_damage.csv
Conduit_damage.csv
Crypto_damage.csv
Fuse_damage.csv
Gibraltar_damage.csv
Horizon_damage.csv
Lifeline_damage.csv
Loba_damage.csv
Mad Maggie_damage.csv
Mirage_damage.csv
Newcastle_damage.csv
Octane_damage.csv
Pathfinder_damage.csv
Rampart_damage.csv
Revenant_damage.csv
Seer_damage.csv
Valkyrie_damage.csv
Vantage_damage.csv
Wattson_damage.csv
Wraith_damage.csv
Files in C:\Users\altaa\Documents\GitHub\Apex-Legends-Research\New Data For Legends\Legend Kills:
Alter_kills.csv
Ash_kills.csv
Ballistic_kills.csv
Bangalore_kills.csv
Bloodhound_kills.csv
Catalyst_kills.csv
Caustic_kills.csv
Conduit_kills.csv
Crypto_kills.csv
Fuse_kills.csv
Gibraltar_kills.csv
Horizon_kills.csv
Lifeline_kills.csv
Loba_kills.csv
Mad Maggie_kills.csv
Mirage_kills.csv
Newcastle_kills.csv

In [102]:
legend_dataframes = []

# Get a unique list of legends from the categories
legends = []
for legends_list in legend_categories.values():
    legends.extend(legends_list)

# Remove duplicates, if any
legends = list(set(legends))

legends

['Mad Maggie',
 'Wattson',
 'Bangalore',
 'Valkyrie',
 'Alter',
 'Newcastle',
 'Seer',
 'Conduit',
 'Loba',
 'Ballistic',
 'Mirage',
 'Catalyst',
 'Lifeline',
 'Vantage',
 'Rampart',
 'Gibraltar',
 'Caustic',
 'Wraith',
 'Horizon',
 'Bloodhound',
 'Revenant',
 'Octane',
 'Crypto',
 'Fuse',
 'Pathfinder',
 'Ash']

In [103]:
# import pandas as pd
# import os

# legend_kills_file = os.path.join(kills_path, f"{legend}_kills.csv")

# # Custom converter to handle numeric values and ignore non-numeric ones
# def convert_number(s):
#     try:
#         # Remove commas and quotes, then convert to integer
#         return int(str(s).replace(',', '').replace('"', '').strip())
#     except ValueError:
#         # Return NaN if conversion fails
#         return pd.NA

# # Read the CSV file, skipping the header if needed, or using header=0 to infer
# df_kills = pd.read_csv(legend_kills_file, header=0, names=['Kills'], converters={0: convert_number})

# # You can inspect the dataframe
# print(df_kills)


In [104]:
import os
import pandas as pd

# Get list of legends
legends = list(legend_to_category.keys())

# Initialize an empty list to hold dataframes for each legend
legend_dataframes = []

# Loop over each legend
for legend in legends:
    try:
        # Build file paths for the legend
        legend_damage_file = os.path.join(damage_path, f"{legend}_damage.csv")
        legend_kills_file = os.path.join(kills_path, f"{legend}_kills.csv")
        legend_matches_file = os.path.join(matches_path, f"{legend}_games_played.csv")  # Updated suffix
        legend_wins_file = os.path.join(wins_path, f"{legend}_wins.csv")
        
        # Check if all files exist
        for file_path in [legend_damage_file, legend_kills_file, legend_matches_file, legend_wins_file]:
            if not os.path.exists(file_path):
                print(f"Data file missing for legend '{legend}': {file_path}. Skipping this legend.")
                continue  # Skip this legend if any file is missing
        
        # Read the data files
        df_damage = pd.read_csv(legend_damage_file, header=None, names=['Damage'])
        df_matches = pd.read_csv(legend_matches_file, header=None, names=['Games Played'])
        df_wins = pd.read_csv(legend_wins_file, header=None, names=['Wins'])

        # Drop the first row if it's a header (based on your approach)
        df_damage = df_damage.drop(df_damage.index[0])
        df_matches = df_matches.drop(df_matches.index[0])
        df_wins = df_wins.drop(df_wins.index[0])
        
        # Custom converter to handle numeric values and ignore non-numeric ones
        def convert_number(s):
            try:
                # Remove commas and quotes, then convert to integer
                return int(str(s).replace(',', '').replace('"', '').strip())
            except ValueError:
                # Return NaN if conversion fails
                return pd.NA

        # Read the kills CSV file with the custom converter
        df_kills = pd.read_csv(legend_kills_file, header=0, names=['Kills'], converters={0: convert_number})

        # Combine the data into a single DataFrame, aligning on index (axis=1)
        df_legend = pd.concat([df_damage.reset_index(drop=True), 
                               df_kills.reset_index(drop=True), 
                               df_matches.reset_index(drop=True), 
                               df_wins.reset_index(drop=True)], axis=1)

        # Add 'legend_name' column to identify the legend in the combined DataFrame
        df_legend['legend_name'] = legend

        # Append the processed DataFrame to the list
        legend_dataframes.append(df_legend)
    
    except Exception as e:
        # Catch any exceptions during processing and print the error
        print(f"An error occurred while processing legend '{legend}': {e}")

legend_dataframes

[         Damage   Kills Games Played   Wins legend_name
 0    1335473024  192425        47418  15355   Bangalore
 1      50500034  177520        35645  14088   Bangalore
 2      48186631  172382        32770  13041   Bangalore
 3      45909908  170902        30597  12675   Bangalore
 4      45231990  168326        28659  12637   Bangalore
 ..          ...     ...          ...    ...         ...
 495    12207812   42522         8325   2422   Bangalore
 496    12206189   42420         8313   2421   Bangalore
 497    12187408   42397         8307   2419   Bangalore
 498    12183304   42366         8274   2419   Bangalore
 499    12182735   42338         8264   2417   Bangalore
 
 [500 rows x 5 columns],
        Damage  Kills Games Played  Wins legend_name
 0    25978220  94621        21979  5705        Fuse
 1    22871307  75600        18028  4890        Fuse
 2    20068143  68322        17543  4889        Fuse
 3    19015432  67357        17463  4260        Fuse
 4    18106947  67347   

In [105]:
# Concatenate all legend dataframes
all_legends_df = pd.concat(legend_dataframes, ignore_index=True)


In [106]:
all_legends_df['Legend_Category'] = all_legends_df['legend_name'].map(legend_to_category)


In [107]:
# Before renaming, let's check the columns
print(all_legends_df.columns)
# Rename columns if necessary
# all_legends_df.rename(columns={
#     'damage': 'total_damage',
#     'kills': 'total_kills',
#     'matches_played': 'matches_played',
#     'wins': 'total_wins'
# }, inplace=True)


Index(['Damage', 'Kills', 'Games Played', 'Wins', 'legend_name',
       'Legend_Category'],
      dtype='object')


In [113]:
# Ensure that columns are numeric, forcing invalid values to NaN
all_legends_df['Kills'] = pd.to_numeric(all_legends_df['Kills'], errors='coerce')
all_legends_df['Wins'] = pd.to_numeric(all_legends_df['Wins'], errors='coerce')
all_legends_df['Games Played'] = pd.to_numeric(all_legends_df['Games Played'], errors='coerce')
all_legends_df['Damage'] = pd.to_numeric(all_legends_df['Damage'], errors='coerce')

# Group the dataframe by 'Legend Category'
grouped_legends_df = all_legends_df.groupby('Legend_Category')

# Define a function to apply calculations within each group
def calculate_stats(group):
    group['Kills_per_Win'] = group.apply(lambda row: row['Kills'] / row['Wins'] if pd.notna(row['Wins']) and row['Wins'] > 0 else 0, axis=1)
    group['Kills_per_Match'] = group.apply(lambda row: row['Kills'] / row['Games Played'] if pd.notna(row['Games Played']) and row['Games Played'] > 0 else 0, axis=1)
    group['Damage_per_Match'] = group.apply(lambda row: row['Damage'] / row['Games Played'] if pd.notna(row['Games Played']) and row['Games Played'] > 0 else 0, axis=1)
    return group

# Apply the function to each group and calculate stats
grouped_legends_df.apply(calculate_stats).reset_index(drop=True)


  grouped_legends_df.apply(calculate_stats).reset_index(drop=True)


Unnamed: 0,Damage,Kills,Games Played,Wins,legend_name,Legend_Category,Kills_per_Win,Kills_per_Match,Damage_per_Match
0,1335473024,192425,47418,15355.0,Bangalore,Assault,12.531749,4.058058,28163.841242
1,50500034,177520,35645,14088.0,Bangalore,Assault,12.600795,4.980222,1416.749446
2,48186631,172382,32770,13041.0,Bangalore,Assault,13.218465,5.260360,1470.449527
3,45909908,170902,30597,12675.0,Bangalore,Assault,13.483393,5.585580,1500.470896
4,45231990,168326,28659,12637.0,Bangalore,Assault,13.320092,5.873408,1578.282215
...,...,...,...,...,...,...,...,...,...
12995,1850434,9396,1329,,Conduit,Support,0.000000,7.069977,1392.350640
12996,1849847,9351,1328,,Conduit,Support,0.000000,7.041416,1392.957078
12997,1848894,9351,1328,,Conduit,Support,0.000000,7.041416,1392.239458
12998,1848094,9338,1326,,Conduit,Support,0.000000,7.042232,1393.736048


In [112]:
all_legends_with_stats_df

Unnamed: 0,Damage,Kills,Games Played,Wins,legend_name,Legend_Category,Kills_per_Win,Kills_per_Match,Damage_per_Match
0,1335473024,192425,47418,15355.0,Bangalore,Assault,12.531749,4.058058,28163.841242
1,50500034,177520,35645,14088.0,Bangalore,Assault,12.600795,4.980222,1416.749446
2,48186631,172382,32770,13041.0,Bangalore,Assault,13.218465,5.260360,1470.449527
3,45909908,170902,30597,12675.0,Bangalore,Assault,13.483393,5.585580,1500.470896
4,45231990,168326,28659,12637.0,Bangalore,Assault,13.320092,5.873408,1578.282215
...,...,...,...,...,...,...,...,...,...
12995,1850434,9396,1329,,Conduit,Support,0.000000,7.069977,1392.350640
12996,1849847,9351,1328,,Conduit,Support,0.000000,7.041416,1392.957078
12997,1848894,9351,1328,,Conduit,Support,0.000000,7.041416,1392.239458
12998,1848094,9338,1326,,Conduit,Support,0.000000,7.042232,1393.736048


In [111]:
all_legends_df.head(10)


Unnamed: 0,Damage,Kills,Games Played,Wins,legend_name,Legend_Category
0,1335473024,192425,47418,15355.0,Bangalore,Assault
1,50500034,177520,35645,14088.0,Bangalore,Assault
2,48186631,172382,32770,13041.0,Bangalore,Assault
3,45909908,170902,30597,12675.0,Bangalore,Assault
4,45231990,168326,28659,12637.0,Bangalore,Assault
5,44934583,166350,27171,12104.0,Bangalore,Assault
6,44317988,162274,27153,11816.0,Bangalore,Assault
7,42896122,157981,26569,11749.0,Bangalore,Assault
8,41978290,157477,25995,11548.0,Bangalore,Assault
9,37699061,143286,25267,11412.0,Bangalore,Assault


In [97]:
# # Assuming you have a column 'legend_category' in your DataFrame
# # Group by 'legend_category' and calculate the average for relevant stats
# average_stats_per_category = all_legends_df.groupby('Legend_Category').agg(
#     Average_Kills_per_Win=('Kills_per_Win', 'mean'),
#     Average_Kills_per_Match=('Kills_per_Match', 'mean'),
#     Average_Damage_per_Match=('Damage_per_Match', 'mean')
# ).reset_index()

# # Sort by any of the calculated stats, for example, by 'Average_Kills_per_Match'
# sorted_average_stats = average_stats_per_category.sort_values(by='Average_Kills_per_Match', ascending=False)

# sorted_average_stats


Unnamed: 0,Legend_Category,Average_Kills_per_Win,Average_Kills_per_Match,Average_Damage_per_Match
3,Skirmisher,15.870937,5.900894,1413.11174
0,Assault,16.145782,5.389439,1529.186712
4,Support,10.926549,5.381843,1375.002465
1,Controller,24.195288,5.117616,1467.854745
2,Recon,19.728249,4.704346,1444.803915
