# preprocess and convert it to proper dataset

In [3]:
import csv
import os

def preprocess_and_append_csv(current_file='game_data.csv', master_file='dataset_player2.csv'):
    processed_rows = []
    # Define columns to remove
    columns_to_remove = [
        'player1_buttons up',
        'player1_buttons down',
        'player1_buttons left',
        'player1_buttons right',
        'player1.player_buttons.A',
        'player1.player_buttons.B',
        'player1.player_buttons.Y',
        'player1.player_buttons.R',
        'player1.player_buttons.L'
    ]

    with open(current_file, mode='r', newline='') as infile:
        reader = csv.DictReader(infile)
        # Original fieldnames
        original_fieldnames = reader.fieldnames
        # New fieldnames excluding removed columns
        fieldnames = [col for col in original_fieldnames if col not in columns_to_remove]
        
        for row in reader:
            # Check if all player2 button columns are 0
            player2_button_columns = [
                'player2_buttons up',
                'player2_buttons down',
                'player2_buttons right',
                'player2_buttons left',
                'player2.player_buttons.A',
                'player2.player_buttons.B',
                'player2.player_buttons.Y',
                'player2.player_buttons.R',
                'player2.player_buttons.L'
            ]
            all_zero = all(row.get(col, '0') in ('0', 'FALSE', 'False') for col in player2_button_columns)
            
            if all_zero:
                # Skip rows where all player2 button columns are 0
                continue
                
            # Create new row with only kept columns
            new_row = {col: row[col] for col in fieldnames}
            
            # Process boolean values
            for key in new_row:
                if new_row[key] == "TRUE" or new_row[key] == "True":
                    new_row[key] = 1
                elif new_row[key] == "FALSE" or new_row[key] == "False":
                    new_row[key] = 0

            # Process fight_result
            if new_row['fight_result'] == "NOT_OVER":
                new_row['fight_result'] = 0
            elif new_row['fight_result'] == "P1":
                new_row['fight_result'] = 1
            elif new_row['fight_result'] == "P2":
                new_row['fight_result'] = 2

            # Convert numeric strings to integers
            for key in new_row:
                if isinstance(new_row[key], str) and new_row[key].isdigit():
                    new_row[key] = int(new_row[key])

            processed_rows.append(new_row)

    if not processed_rows:
        print("No new data to process.")
        return

    file_exists = os.path.isfile(master_file)
    with open(master_file, mode='a', newline='') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        writer.writerows(processed_rows)

    with open(current_file, mode='w', newline='') as clearfile:
        writer = csv.DictWriter(clearfile, fieldnames=original_fieldnames)
        writer.writeheader()

    print(f"Processed and moved {len(processed_rows)} rows to {master_file}.")

preprocess_and_append_csv()

Processed and moved 6973 rows to dataset_player2.csv.


In [28]:
import pandas as pd

# Load dataset
df = pd.read_csv("dataset.csv")  # Replace with your actual file name

# Define button targets to keep rows with any button press
button_targets = [
    'player1_buttons up',
    'player1_buttons down',
    'player1_buttons left',
    'player1_buttons right',
    'player1.player_buttons.A',
    'player1.player_buttons.B',
    'player1.player_buttons.Y',
    'player1.player_buttons.R',
    'player1.player_buttons.L'
]

# Remove all Player2 button columns
df = df[[col for col in df.columns if not col.startswith('player2')]]

# Keep only rows where at least one player1 button is active (non-zero)
df = df[df[button_targets].any(axis=1)]

# Count the number of times each button is pressed (i.e., value == 1)
button_press_counts = df[button_targets].sum()

# Print counts
print("Button press counts:")
print(button_press_counts)


# # Save cleaned data
df.to_csv("filtered_dataset.csv", index=False)
print("Filtered dataset saved to filtered_dataset.csv")


Button press counts:
player1_buttons up           3540
player1_buttons down         9418
player1_buttons left        17878
player1_buttons right       21248
player1.player_buttons.A     1692
player1.player_buttons.B     3310
player1.player_buttons.Y      340
player1.player_buttons.R     2932
player1.player_buttons.L     5130
dtype: int64


PermissionError: [Errno 13] Permission denied: 'filtered_dataset.csv'

In [None]:
import pandas as pd
from sklearn.utils import resample

# Load your filtered dataset
df = pd.read_csv("dataset_player2.csv")

# Target button columns
button_targets = [
                'player2_buttons up',
                'player2_buttons down',
                'player2_buttons right',
                'player2_buttons left',
                'player2.player_buttons.A',
                'player2.player_buttons.B',
                'player2.player_buttons.Y',
                'player2.player_buttons.R',
                'player2.player_buttons.L'
            ]

TARGET_COUNT = 10000
synth_dfs = []

# Step 1: Collect rows where button==1 for each button
for button in button_targets:
    pos_rows = df[df[button] == 1]
    if len(pos_rows) >= TARGET_COUNT:
        sampled = pos_rows.sample(n=TARGET_COUNT, random_state=42)
    else:
        sampled = resample(pos_rows, replace=True, n_samples=TARGET_COUNT, random_state=42)
    synth_dfs.append(sampled)

# Step 2: Combine all rows (reused allowed)
synthetic_df = pd.concat(synth_dfs).reset_index(drop=True)

# Step 3: Trim each column to have exactly 5000 presses
for button in button_targets:
    idx = synthetic_df[synthetic_df[button] == 1].index
    if len(idx) > TARGET_COUNT:
        to_zero = idx[TARGET_COUNT:]  # keep only first 5000
        synthetic_df.loc[to_zero, button] = 0

# Step 4: Shuffle and save
synthetic_df = synthetic_df.sample(frac=1, random_state=42).reset_index(drop=True)
synthetic_df = synthetic_df[synthetic_df[button_targets].any(axis=1)]
synthetic_df.to_csv("player2_synthetic data.csv", index=False)

# Confirm
print("Final counts (each should be 5000):")
print(synthetic_df[button_targets].sum())
print("Final dataset shape:", synthetic_df.shape)


Final counts (each should be 5000):
player2_buttons up          10000
player2_buttons down        10000
player2_buttons right       10000
player2_buttons left        10000
player2.player_buttons.A    10000
player2.player_buttons.B    10000
player2.player_buttons.Y    10000
player2.player_buttons.R    10000
player2.player_buttons.L    10000
dtype: int64
Final dataset shape: (70377, 29)


In [10]:
# # 1. Import Libraries
# import pandas as pd
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import classification_report
# import joblib
# import os

# # 2. Load Dataset
# df = pd.read_csv("player2_synthetic data.csv")  # Replace with your filename

# # 4. List of Player1 Button Columns to Predict
# button_targets = [
#                 'player2_buttons up',
#                 'player2_buttons down',
#                 'player2_buttons right',
#                 'player2_buttons left',
#                 'player2.player_buttons.A',
#                 'player2.player_buttons.B',
#                 'player2.player_buttons.Y',
#                 'player2.player_buttons.R',
#                 'player2.player_buttons.L'
#             ]

# df = df[df[button_targets].any(axis=1)]
# # 5. Make Directory to Save Models
# os.makedirs("saved_modelsp2", exist_ok=True)

# # 6. Train & Save Model for Each Button
# for target in button_targets:
#     print(f"\nTraining model for: {target}")

#     # Define features: drop all targets
#     X = df.drop(columns=target)
#     y = df[target]

#     # Train-test split
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#     # Train classifier
#     clf = RandomForestClassifier(n_estimators=100, random_state=42)
#     clf.fit(X_train, y_train)

#     # Evaluate
#     y_pred = clf.predict(X_test)
#     print(classification_report(y_test, y_pred))

#     # Save model
#     model_filename = f"saved_modelsp2/{target.replace(' ', '_').replace('.', '')}_model.pkl"
#     joblib.dump(clf, model_filename)
#     print(f"Saved model to: {model_filename}")


In [11]:
# print(X_train.columns)

In [15]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd 
import os 
import joblib


# 2. Load Dataset
df = pd.read_csv("player2_synthetic data.csv")  # Replace with your filename

# 4. List of Player1 Button Columns to Predict
button_targets = [
                'player2_buttons up',
                'player2_buttons down',
                'player2_buttons right',
                'player2_buttons left',
                'player2.player_buttons.A',
                'player2.player_buttons.B',
                'player2.player_buttons.Y',
                'player2.player_buttons.R',
                'player2.player_buttons.L'
            ]

df = df[df[button_targets].any(axis=1)]
# 5. Make Directory to Save Models
os.makedirs("saved_modelsp2", exist_ok=True)


for target in button_targets:
    print(f"\nTraining LightGBM model for: {target}")

    X = df.drop(columns=target)
    print(X.columns)
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    clf = lgb.LGBMClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    
    joblib.dump(clf, f"saved_modelsp2/{target.replace(' ', '_').replace('.', '')}_model.pkl")



Training LightGBM model for: player2_buttons up
Index(['timer', 'fight_result', 'has_round_started', 'is_round_over',
       'Player1_ID', 'health', 'x_coord', 'y_coord', 'is_jumping',
       'is_crouching', 'is_player_in_move', 'move_id', 'Player2_ID',
       'Player2 health', 'Player2 x_coord', 'Player2 y_coord',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id', 'player2_buttons down',
       'player2_buttons right', 'player2_buttons left',
       'player2.player_buttons.A', 'player2.player_buttons.B',
       'player2.player_buttons.Y', 'player2.player_buttons.R',
       'player2.player_buttons.L'],
      dtype='object')
[LightGBM] [Info] Number of positive: 8009, number of negative: 48292
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012636 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [In

In [23]:
print(X_train.columns)

Index(['timer', 'fight_result', 'has_round_started', 'is_round_over',
       'Player1_ID', 'health', 'x_coord', 'y_coord', 'is_jumping',
       'is_crouching', 'is_player_in_move', 'move_id', 'Player2_ID',
       'Player2 health', 'Player2 x_coord', 'Player2 y_coord',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id', 'player2_buttons up',
       'player2_buttons down', 'player2_buttons right', 'player2_buttons left',
       'player2.player_buttons.A', 'player2.player_buttons.B',
       'player2.player_buttons.Y', 'player2.player_buttons.R'],
      dtype='object')


In [25]:
import pandas as pd
import joblib

input_data = {
    'timer': 60,
    'fight_result': 0,
    'has_round_started': 1,
    'is_round_over': 0,

    'Player1_ID': 1,
    'health': 120,
    'x_coord': 150,
    'y_coord': 50,
    'is_jumping': 1,
    'is_crouching': 0,
    'is_player_in_move': 1,
    'move_id': 2,

    'Player2_ID': 6,
    'Player2 health': 176,
    'Player2 x_coord': 200,
    'Player2 y_coord': 100,

    'Player2 is_jumping': 0,
    'Player2 is_crouching': 0,
    'Player2 is_player_in_move': 0,
    'Player2 move_id': 0,

    'player2_buttons up': 0,
    'player2_buttons down': 0,
    'player2_buttons right': 0,
    'player2_buttons left': 0,
    
    'player2.player_buttons.A': 0,
    'player2.player_buttons.B': 0,
    'player2.player_buttons.Y': 0,
    'player2.player_buttons.R': 0,
    'player2.player_buttons.L': 0
}

button_targets = [
                'player2_buttons up',
                'player2_buttons down',
                'player2_buttons right',
                'player2_buttons left',
                'player2.player_buttons.A',
                'player2.player_buttons.B',
                'player2.player_buttons.Y',
                'player2.player_buttons.R',
                'player2.player_buttons.L'
            ]

original_df = pd.DataFrame([input_data])

predicted_buttons = {}

for target in button_targets:
    input_features = original_df.drop(columns=[target], errors='ignore')

    model_path = f"saved_modelsp2/{target.replace(' ', '_').replace('.', '')}_model.pkl"
    model = joblib.load(model_path)

    prediction = model.predict(input_features)[0]
    predicted_buttons[target] = prediction

print("\nPredicted Player1 button states:")
for key, value in predicted_buttons.items():
    print(f"{key}: {value}")



Predicted Player1 button states:
player2_buttons up: 0
player2_buttons down: 0
player2_buttons right: 1
player2_buttons left: 1
player2.player_buttons.A: 0
player2.player_buttons.B: 0
player2.player_buttons.Y: 0
player2.player_buttons.R: 0
player2.player_buttons.L: 0
