# Import Required Libraries
Import pandas for data processing, matplotlib for visualization, glob for file handling, and os for path operations.

In [None]:
# Import Required Libraries
import pandas as pd  # For data processing
import matplotlib.pyplot as plt  # For visualization
import glob  # For file handling
import os  # For path operations
import numpy as np  # For numerical operations

# Load CSV Files
Use glob to get all CSV files in the root directory and create a function to read and validate each file's structure.

In [16]:
# Function to read and validate CSV files and split them into 8 DataFrames
def load_csv_files():
    csv_files = glob.glob(os.path.join(os.getcwd(), "*.csv"))  # Get all CSV files in the root directory
    data_frames = {f"df{i}": [] for i in range(1, 9)}  # Create a dictionary to hold 8 DataFrames
    
    for file in csv_files:
        df = pd.read_csv(file)
        
        # Validate the structure of the CSV file
        if 'Kills' in df.columns and 'Total Deaths' in df.columns:
            # Determine which DataFrame the file belongs to based on its prefix
            for i in range(1, 9):
                if os.path.basename(file).startswith(f"CA_Q{i}"):
                    data_frames[f"df{i}"].append(df)
                    print(f"File {file} has been added to df{i}.")
                    break
        else:
            print(f"File {file} does not have the required structure.")
    
    # Combine files for each prefix into a single DataFrame
    for i in range(1, 9):
        data_frames[f"df{i}"] = pd.concat(data_frames[f"df{i}"], ignore_index=True) if data_frames[f"df{i}"] else pd.DataFrame()
    
    return data_frames

# Load the CSV files and split them into 8 DataFrames
data_frames = load_csv_files()

# Access individual DataFrames
df1 = data_frames["df1"]
df2 = data_frames["df2"]
df3 = data_frames["df3"]
df4 = data_frames["df4"]
df5 = data_frames["df5"]
df6 = data_frames["df6"]
df7 = data_frames["df7"]
df8 = data_frames["df8"]

File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q1_1.csv has been added to df1.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q1_2.csv has been added to df1.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q1_3.csv has been added to df1.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q1_4.csv has been added to df1.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q1_5.csv has been added to df1.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q2_1.csv has been added to df2.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q2_2.csv has been added to df2.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q2_3.csv has been added to df2.
File d:\core runs\battle data apr 25, 13 hr agents, 4 hr run\testing fitnesses\CA_Q2_4.c

# Process Multiple Files
Create DataFrames from each CSV file, extract kills and deaths data, and combine them into a single dataset with proper labeling.

In [17]:
# Create a KD ratio comparison graph for specified DataFrame pairs

# Calculate KD ratios for each DataFrame
kd_ratios = {}
for i in range(1, 9):
    df_name = f"df{i}"
    df = eval(df_name)  # Get the DataFrame using its variable name
    
    if not df.empty:
        total_kills = df['Kills'].sum()
        total_deaths = df['Total Deaths'].sum() if 'Total Deaths' in df.columns else 0
        
        # Handle division by zero
        kd_ratio = total_kills / total_deaths if total_deaths > 0 else float('inf')
        kd_ratios[df_name] = kd_ratio
    else:
        kd_ratios[df_name] = 0

# Define the pairs to compare
pairs = [('df1', 'df5'), ('df2', 'df6'), ('df3', 'df7'), ('df4', 'df8')]
pair_labels = ['Pair 1-5', 'Pair 2-6', 'Pair 3-7', 'Pair 4-8']

# Create a comparison bar chart
plt.figure(figsize=(12, 8))
bar_width = 0.35
index = np.arange(len(pairs))

# Import numpy if needed
import numpy as np

# Create bars for each DataFrame in the pairs
for i, (df1_name, df2_name) in enumerate(pairs):
    plt.bar(index[i], kd_ratios[df1_name], bar_width, label=df1_name, color='blue')
    plt.bar(index[i] + bar_width, kd_ratios[df2_name], bar_width, label=df2_name, color='red')

# Add labels and title
plt.xlabel('DataFrame Pairs')
plt.ylabel('Kill/Death Ratio')
plt.title('Comparison of Kill/Death Ratios Between DataFrame Pairs')
plt.xticks(index + bar_width/2, pair_labels)
plt.grid(True, axis='y')

# Add a legend with a more organized layout
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), loc='upper right')

# Show values on top of each bar
for i, (df1_name, df2_name) in enumerate(pairs):
    plt.text(index[i], kd_ratios[df1_name] + 0.1, f"{kd_ratios[df1_name]:.2f}", 
             ha='center', va='bottom', fontweight='bold')
    plt.text(index[i] + bar_width, kd_ratios[df2_name] + 0.1, f"{kd_ratios[df2_name]:.2f}", 
             ha='center', va='bottom', fontweight='bold')

# Show the plot
plt.tight_layout()
plt.show()

# Print the exact values for reference
print("\nKill/Death Ratios:")
for df_name, ratio in kd_ratios.items():
    print(f"{df_name}: {ratio:.5f}")

print("\nPair Comparisons:")
for df1_name, df2_name in pairs:
    diff = kd_ratios[df1_name] - kd_ratios[df2_name]
    percent = (diff / kd_ratios[df2_name] * 100) if kd_ratios[df2_name] != 0 else float('inf')
    print(f"{df1_name} vs {df2_name}: Difference = {diff:.5f}, Percentage = {percent:.2f}%")

NameError: name 'np' is not defined

<Figure size 1200x800 with 0 Axes>

In [None]:
# Process Multiple Files

# Combine data from all CSV files into a single DataFrame
combined_df = pd.concat(data_frames, ignore_index=True)

# Extract kills and deaths data
kills_data = combined_df['Kills']
deaths_data = combined_df['Total Deaths']

# Add an entry number column to track the number of entries
combined_df['entry_number'] = combined_df.index + 1

# Plot kills and deaths over the number of entries
plt.figure(figsize=(10, 6))
plt.plot(combined_df['entry_number'], kills_data, label='Kills', color='blue')
plt.plot(combined_df['entry_number'], deaths_data, label='Deaths', color='red')
plt.xlabel('Entry Number')
plt.ylabel('Count')
plt.title('Kills and Deaths Over Time')
plt.legend()
plt.grid(True)
#plt.show()

In [None]:
# Create Visualization

# Initialize a figure for the plot
plt.figure(figsize=(12, 8))

# Iterate over each DataFrame to plot kills and deaths
for i, df in enumerate(data_frames):
    entry_number = df.index + 1  # Create an entry number column for the current DataFrame
    plt.plot(entry_number, df['Kills'], label=f'Kills (File {i+1})', linestyle='-', marker='o', color = 'blue')
    plt.plot(entry_number, df['Self Deaths'], label=f'Deaths (File {i+1})', linestyle='--', marker='x', color = 'red')

# Add labels and title
plt.xlabel('Entry Number')
plt.ylabel('Count')
plt.title('Kills and Deaths Over Time for Multiple CSV Files')
#plt.legend()
plt.grid(True)

# Show the plot
plt.show()

# Calculate totals and ratio
total_kills = sum(df['Kills'].sum() for df in data_frames)
total_deaths = sum(df['Total Deaths'].sum() for df in data_frames)
kd_ratio = total_kills / total_deaths if total_deaths > 0 else float('inf')

print(f"\nTotal Statistics:")
print(f"Total Kills: {total_kills}")
print(f"Total Self Deaths: {total_deaths}")
print(f"Kill/Self Death Ratio: {kd_ratio:.5f}")
