# preprocess and convert it to proper dataset

In [2]:
import csv
import os
# def preprocess_and_append_csv(current_file='game_data.csv', master_file='dataset.csv'):
def preprocess_and_append_csv(current_file='game_data.csv', master_file='datasetp2.csv'):
    processed_rows = []

    with open(current_file, mode='r', newline='') as infile:
        reader = csv.DictReader(infile)
        fieldnames = reader.fieldnames
        for row in reader:
            
            for key in row:
                print(row[key])
                if row[key] == "TRUE" or row[key] == "True" :
                    row[key] = 1
                elif row[key] == "FALSE" or row[key] == "False":
                    row[key] = 0

            if row['fight_result'] == "NOT_OVER":
                row['fight_result'] = 0
            elif row['fight_result'] == "P1":
                row['fight_result'] = 1
            elif row['fight_result'] == "P2":
                row['fight_result'] = 2

            for key in row:
                if isinstance(row[key], str) and row[key].isdigit():
                    row[key] = int(row[key])

            processed_rows.append(row)

    if not processed_rows:
        print("No new data to process.")
        return


    file_exists = os.path.isfile(master_file)
    with open(master_file, mode='a', newline='') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        writer.writerows(processed_rows)

    with open(current_file, mode='w', newline='') as clearfile:
        writer = csv.DictWriter(clearfile, fieldnames=fieldnames)
        writer.writeheader()

    print(f"Processed and moved {len(processed_rows)} rows to {master_file}.")
preprocess_and_append_csv()

No new data to process.


In [None]:
import pandas as pd

df = pd.read_csv("datasetp2.csv")
df = df[[col for col in df.columns if not col.startswith('player2')]]
print(df.columns)

Index(['timer', 'fight_result', 'has_round_started', 'is_round_over',
       'Player1_ID', 'health', 'x_coord', 'y_coord', 'is_jumping',
       'is_crouching', 'is_player_in_move', 'move_id', 'player1_buttons up',
       'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R',
       'player1.player_buttons.L', 'Player2_ID', 'Player2 health',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id', 'Player2_button_up',
       'Player2_button_down', 'Player2_button_right', 'Player2_button_left',
       'Player2_button_A', 'Player2_button_B', 'Player2_button_Y',
       'Player2_button_R', 'Player2_button_L', 'combo_id'],
      dtype='object')


In [None]:
column_rename_map = {
    'Player1_health': 'health',
    'X_distance': 'x_coord',
    'Y_distance': 'y_coord',
    'Player1_is_jumping': 'is_jumping',
    'Player1_is_crouching': 'is_crouching',
    'Player1_is_player_in_move': 'is_player_in_move',
    'Player1_move_id': 'move_id',
    'Player1_button_up': 'player1_buttons up',
    'Player1_button_down': 'player1_buttons down',
    'Player1_button_right': 'player1_buttons right',
    'Player1_button_left': 'player1_buttons left',
    'Player1_button_A': 'player1.player_buttons.A',
    'Player1_button_B': 'player1.player_buttons.B',
    'Player1_button_Y': 'player1.player_buttons.Y',
    'Player1_button_R': 'player1.player_buttons.R',
    'Player1_button_L': 'player1.player_buttons.L',
    
    'Player2_health': 'Player2 health',
    'Player2_is_jumping': 'Player2 is_jumping',
    'Player2_is_crouching': 'Player2 is_crouching',
    'Player2_is_player_in_move': 'Player2 is_player_in_move',
    'Player2_move_id': 'Player2 move_id'
}


df = df.rename(columns=column_rename_map)
df.to_csv("datasetp2.csv", index=False)



In [None]:
import pandas as pd
from sklearn.utils import resample


df = pd.read_csv("datasetp2.csv")

df['combo_present'] = df['combo_id'].apply(lambda x: 0 if x == -1 else 1)

button_columns = [
    'player1_buttons up', 'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
    'player1.player_buttons.A', 'player1.player_buttons.B', 'player1.player_buttons.Y',
    'player1.player_buttons.R', 'player1.player_buttons.L'
]
df = df[[col for col in df.columns if not col.startswith('Player2_but')]]

df_combo = df[df['combo_present'] == 1]
df_no_combo = df[df['combo_present'] == 0]

df_combo_balanced = resample(df_combo, replace=True, n_samples=15000, random_state=42)
df_no_combo_balanced = resample(df_no_combo, replace=True, n_samples=15000, random_state=42)

df_balanced = pd.concat([df_combo_balanced, df_no_combo_balanced]).sample(frac=1, random_state=42).reset_index(drop=True)

df_balanced.to_csv("balanced_dataset.csv", index=False)

print("Balanced dataset created with shape:", df_balanced.shape)


Balanced dataset created with shape: (30000, 29)


In [None]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib


df_balanced = df_balanced.drop(columns=['timer'])
X = df_balanced.drop(columns=button_columns + ['combo_id', 'combo_present'])
y = df_balanced['combo_present']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = lgb.LGBMClassifier(n_estimators=500, max_depth=10, num_leaves=64, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

joblib.dump(model, "saved_modelsp2/combo_presence_model.pkl")


[LightGBM] [Info] Number of positive: 11957, number of negative: 12043
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002698 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 681
[LightGBM] [Info] Number of data points in the train set: 24000, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.498208 -> initscore=-0.007167
[LightGBM] [Info] Start training from score -0.007167
              precision    recall  f1-score   support

           0       1.00      0.95      0.97      2957
           1       0.95      1.00      0.98      3043

    accuracy                           0.97      6000
   macro avg       0.98      0.97      0.97      6000
weighted avg       0.98      0.97      0.97      6000



['saved_modelsp2/combo_presence_model.pkl']

In [None]:
proba = model.predict_proba(X_test)
print(proba[:10])

y_pred_custom = (proba[:, 1] > 0.5).astype(int)
y_pred_custom


[[5.95336543e-02 9.40466346e-01]
 [4.23469636e-03 9.95765304e-01]
 [2.36217450e-03 9.97637826e-01]
 [2.86991385e-02 9.71300862e-01]
 [3.75937676e-01 6.24062324e-01]
 [9.99999299e-01 7.01097513e-07]
 [9.07428003e-01 9.25719965e-02]
 [3.28535268e-04 9.99671465e-01]
 [9.99998871e-01 1.12866157e-06]
 [7.16358059e-02 9.28364194e-01]]


array([1, 1, 1, ..., 0, 1, 1])

In [None]:
from sklearn.metrics import classification_report
import numpy as np

proba = model.predict_proba(X_test)[:, 1]

threshold = 0.05
y_pred_custom = (proba > threshold).astype(int)

unique, counts = np.unique(y_pred_custom, return_counts=True)
print("Prediction counts:", dict(zip(unique, counts)))

print(classification_report(y_test, y_pred_custom))


Prediction counts: {0: 2695, 1: 3305}
              precision    recall  f1-score   support

           0       1.00      0.91      0.95      2957
           1       0.92      1.00      0.96      3043

    accuracy                           0.96      6000
   macro avg       0.96      0.96      0.96      6000
weighted avg       0.96      0.96      0.96      6000



In [None]:
import pandas as pd
import joblib

model = joblib.load("saved_modelsp2/combo_presence_model.pkl")

input_data = {
    # 'timer': [100],
    'fight_result': [0],
    'has_round_started': [1],
    'is_round_over': [0],
    'Player1_ID': [3],
    'health': [120],
    'x_coord': [200],
    'y_coord': [192],
    'is_jumping': [0],
    'is_crouching': [0],
    'is_player_in_move': [0],
    'move_id': [0],
    'Player2_ID': [6],
    'Player2 health': [120],
    'Player2 is_jumping': [1],
    'Player2 is_crouching': [0],
    'Player2 is_player_in_move': [0],
    'Player2 move_id': [0]
}

input_df = pd.DataFrame(input_data)

button_columns = [
    'player1_buttons up', 'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
    'player1.player_buttons.A', 'player1.player_buttons.B', 'player1.player_buttons.Y',
    'player1.player_buttons.R', 'player1.player_buttons.L'
]
input_df = input_df.drop(columns=button_columns, errors='ignore')

prediction = model.predict(input_df)

print(f"Combo Present: {prediction[0]}")


Combo Present: 0


In [None]:
combo_df = df[df['combo_id'] != -1]  
key_df = df[df['combo_id'] == -1]    

combo_df.to_csv("combop2.csv", index=False)
key_df.to_csv("keyp2.csv", index=False)

print(f"Saved {len(combo_df)} rows to combo.csv and {len(key_df)} rows to key.csv")


Saved 2115 rows to combo.csv and 28521 rows to key.csv


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import joblib

df = pd.read_csv("combop2.csv")

button_cols = [col for col in df.columns if 'button' in col]

df = df.drop(columns=button_cols)
df = df.drop(columns=['timer'])

X = df.drop(columns=['combo_id','combo_present'])
y = df['combo_id']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

mlp = MLPClassifier(hidden_layer_sizes=(128, 64, 32, 16), max_iter=500, random_state=42)
mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)
print(classification_report(y_test, y_pred))

joblib.dump(mlp, "saved_modelsp2/combo_mlp_model.pkl")
joblib.dump(scaler, "saved_modelsp2/combo_scaler.pkl")


              precision    recall  f1-score   support

           2       0.92      0.87      0.89        83
           3       0.91      0.96      0.93       102
           4       0.99      0.99      0.99       144
           5       0.89      0.91      0.90        35
           6       0.87      0.87      0.87        23
           7       0.94      0.89      0.91        36

    accuracy                           0.94       423
   macro avg       0.92      0.92      0.92       423
weighted avg       0.94      0.94      0.94       423





['saved_modelsp2/combo_scaler.pkl']

In [16]:
print(X.columns)

Index(['fight_result', 'has_round_started', 'is_round_over', 'Player1_ID',
       'health', 'x_coord', 'y_coord', 'is_jumping', 'is_crouching',
       'is_player_in_move', 'move_id', 'Player2_ID', 'Player2 health',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id'],
      dtype='object')


In [None]:
import pandas as pd
import joblib
import numpy as np

mlp_model = joblib.load("saved_modelsp2/combo_mlp_model.pkl")
scaler = joblib.load("saved_modelsp2/combo_scaler.pkl")

example_input = {
    # 'timer': 100,
    'fight_result': 0,
    'has_round_started': 1,
    'is_round_over': 0,
    'Player1_ID': 3,
    'health': 120,
    'x_coord': 200,
    'y_coord': 192,
    'is_jumping': 0,
    'is_crouching': 0,
    'is_player_in_move': 0,
    'move_id': 0,
    'Player2_ID': 6,
    'Player2 health': 120,
    'Player2 is_jumping': 1,
    'Player2 is_crouching': 0,
    'Player2 is_player_in_move': 0,
    'Player2 move_id': 0
}

# Convert the example input to a DataFrame
example_df = pd.DataFrame([example_input])
print(example_df.columns)
example_scaled = scaler.transform(example_df)
prediction = mlp_model.predict(example_scaled)

print(f"Predicted combo_id: {prediction[0]}")


Index(['fight_result', 'has_round_started', 'is_round_over', 'Player1_ID',
       'health', 'x_coord', 'y_coord', 'is_jumping', 'is_crouching',
       'is_player_in_move', 'move_id', 'Player2_ID', 'Player2 health',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id'],
      dtype='object')
Predicted combo_id: 3


In [27]:
# button_targets = [
#     'player1_buttons up',
#     'player1_buttons down',
#     'player1_buttons left',
#     'player1_buttons right',    
#     'player1.player_buttons.A',
#     'player1.player_buttons.B',
#     'player1.player_buttons.Y',
#     'player1.player_buttons.R',
#     'player1.player_buttons.L'
# ]

# df = df[df[button_targets].any(axis=1) | (df['combo_id'] != -1)]

In [16]:
print(df.columns)

Index(['timer', 'fight_result', 'has_round_started', 'is_round_over',
       'Player1_ID', 'health', 'x_coord', 'y_coord', 'is_jumping',
       'is_crouching', 'is_player_in_move', 'move_id', 'player1_buttons up',
       'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R',
       'player1.player_buttons.L', 'Player2_ID', 'Player2 health',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id', 'Player2_button_up',
       'Player2_button_down', 'Player2_button_right', 'Player2_button_left',
       'Player2_button_A', 'Player2_button_B', 'Player2_button_Y',
       'Player2_button_R', 'Player2_button_L', 'combo_id'],
      dtype='object')


In [None]:
# import pandas as pd

# # 1. Load the key dataframe
# df_key = pd.read_csv("key.csv")

# # 2. Drop the 'combo_id' column
# df_key = df_key.drop(columns=['combo_id'])

# print(df_key.head())  

   timer  fight_result  has_round_started  is_round_over  Player1_ID  health  \
0    153             0                  0              0           0     176   
1    153             0                  0              0           0     176   
2    153             0                  0              0           0     176   
3    153             0                  0              0           0     176   
4    153             0                  0              0           0     176   

   x_coord  y_coord  is_jumping  is_crouching  ...  Player2 move_id  \
0     -102        0           0             0  ...                0   
1     -102        0           0             0  ...                0   
2     -102        0           0             0  ...                0   
3     -102        0           0             0  ...                0   
4     -102        0           0             0  ...                0   

   Player2_button_up  Player2_button_down  Player2_button_right  \
0                  0     

In [41]:
# 4. List of Player1 Button Columns to Predict
button_targets = [
    'player1_buttons up',
    'player1_buttons down',
    'player1_buttons left',
    'player1_buttons right',
    'player1.player_buttons.A',
    'player1.player_buttons.B',
    'player1.player_buttons.Y',
    'player1.player_buttons.R',
    'player1.player_buttons.L'
]

# Keep only rows where at least one button is pressed
df = df[df[button_targets].any(axis=1)]

# Downsample 'left' and 'right' to 800 each (only from rows where those buttons are 1)
for button in ['player1_buttons left', 'player1_buttons right']:
    count = df[button].sum()
    if count > 800:
        # Get rows where this button is pressed
        pressed = df[df[button] == 1]
        not_pressed = df[df[button] == 0]

        # Sample only 800 pressed rows
        pressed_downsampled = pressed.sample(n=800, random_state=42)

        # Combine with the rest
        df = pd.concat([pressed_downsampled, not_pressed], ignore_index=True)

# Shuffle the dataset
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Recount to confirm
print("Updated button press counts:\n", df[button_targets].sum())
df.head()
df.to_csv("keyp2.csv", index=False)


Updated button press counts:
 player1_buttons up          106
player1_buttons down        171
player1_buttons left        800
player1_buttons right       800
player1.player_buttons.A      0
player1.player_buttons.B    505
player1.player_buttons.Y    623
player1.player_buttons.R    362
player1.player_buttons.L    185
dtype: int64


In [None]:
# import pandas as pd

# # Load dataset
# df = pd.read_csv("keyp2.csv")  # Replace with your actual file name
# # 2. Drop the 'combo_id' column
# df = df.drop(columns=['combo_id'])
# # Define button targets to keep rows with any button press
# button_targets = [
#     'player1_buttons up',
#     'player1_buttons down',
#     'player1_buttons left',
#     'player1_buttons right',    
#     'player1.player_buttons.A',
#     'player1.player_buttons.B',
#     'player1.player_buttons.Y',
#     'player1.player_buttons.R',
#     'player1.player_buttons.L'
# ]

# df = df[[col for col in df.columns if not col.startswith('player2')]]

# # Keep only rows where at least one player1 button is active (non-zero)
# df = df[df[button_targets].any(axis=1)]

# # Count the number of times each button is pressed (i.e., value == 1)
# button_press_counts = df[button_targets].sum()

# # Print counts
# print("Button press counts:")
# print(button_press_counts)


# # # Save cleaned data
# df.to_csv("filtered_datasetp2.csv", index=False)
# print("Filtered dataset saved to filtered_dataset.csv")


Button press counts:
player1_buttons up           755
player1_buttons down         256
player1_buttons left        5030
player1_buttons right       9251
player1.player_buttons.A       0
player1.player_buttons.B     659
player1.player_buttons.Y     946
player1.player_buttons.R     688
player1.player_buttons.L     292
dtype: int64
Filtered dataset saved to filtered_dataset.csv


In [None]:
# import pandas as pd

# # Load the filtered dataset
# df = pd.read_csv("filtered_dataset.csv")

# # Define target button columns
# button_targets = [
#     'player1_buttons up',
#     'player1_buttons down',
#     'player1_buttons left',
#     'player1_buttons right',
#     'player1.player_buttons.A',
#     'player1.player_buttons.B',
#     'player1.player_buttons.Y',
#     'player1.player_buttons.R',
#     'player1.player_buttons.L'
# ]
# drop_columns = [
#     'timer', 'fight_result', 'has_round_started', 'is_round_over',
#     'Player1_ID', 'health',
#     'is_jumping', 'is_crouching', 'is_player_in_move', 'move_id'
# ]

# # Define new target counts for left and right
# TARGET_LEFT = 5000
# TARGET_RIGHT = 5000

# # Reduce 'player1_buttons left'
# left_indices = df[df['player1_buttons left'] == 1].index
# if len(left_indices) > TARGET_LEFT:
#     drop_indices = left_indices[TARGET_LEFT:]
#     df.loc[drop_indices, 'player1_buttons left'] = 0

# # Reduce 'player1_buttons right'
# right_indices = df[df['player1_buttons right'] == 1].index
# if len(right_indices) > TARGET_RIGHT:
#     drop_indices = right_indices[TARGET_RIGHT:]
#     df.loc[drop_indices, 'player1_buttons right'] = 0

# # Optional: Remove rows that now have no button pressed
# df = df[df[button_targets].any(axis=1)]

# # Save the adjusted dataset
# df.to_csv("filtered_dataset_reduced_lr.csv", index=False)
# print("Adjusted dataset saved to filtered_dataset_reduced_lr.csv")

# # Confirm new counts
# print("Updated button press counts:")
# print(df[button_targets].sum())
# df = df.drop(columns=drop_columns, errors='ignore')
# df.to_csv("filtered_dataset.csv", index=False)

Adjusted dataset saved to filtered_dataset_reduced_lr.csv
Updated button press counts:
player1_buttons up          3540
player1_buttons down        9418
player1_buttons left        5000
player1_buttons right       5000
player1.player_buttons.A    1692
player1.player_buttons.B    3310
player1.player_buttons.Y     340
player1.player_buttons.R    2932
player1.player_buttons.L    5130
dtype: int64


In [None]:
# df =df.drop_duplicates()
# print(df[button_targets].sum())
# df.to_csv("filtered_dataset.csv", index=False)


player1_buttons up          1916
player1_buttons down        2906
player1_buttons right       3403
player1_buttons left        3066
player1.player_buttons.A     469
player1.player_buttons.B    1089
player1.player_buttons.Y     103
player1.player_buttons.R     912
player1.player_buttons.L    1303
dtype: int64


In [None]:
# import pandas as pd
# from sklearn.utils import resample

# # Load your filtered dataset
# df = pd.read_csv("filtered_dataset.csv")

# # Target button columns
# button_targets = [
#     'player1_buttons up',
#     'player1_buttons down',
#     'player1_buttons left',
#     'player1_buttons right',
#     'player1.player_buttons.A',
#     'player1.player_buttons.B',
#     'player1.player_buttons.Y',
#     'player1.player_buttons.R',
#     'player1.player_buttons.L'
# ]

# # Columns to remove
# drop_columns = [
#     'timer', 'fight_result', 'has_round_started', 'is_round_over',
#     'Player1_ID', 'health',
#     'is_jumping', 'is_crouching', 'is_player_in_move', 'move_id'
# ]

# TARGET_COUNT = 20000
# synth_dfs = []

# # Step 1: Collect rows where button==1 for each button
# for button in button_targets:
#     pos_rows = df[df[button] == 1]
#     if len(pos_rows) >= TARGET_COUNT:
#         sampled = pos_rows.sample(n=TARGET_COUNT, random_state=42)
#     else:
#         sampled = resample(pos_rows, replace=True, n_samples=TARGET_COUNT, random_state=42)
#     synth_dfs.append(sampled)

# # Step 2: Combine all rows (reused allowed)
# synthetic_df = pd.concat(synth_dfs).reset_index(drop=True)

# # Step 3: Trim each column to have exactly TARGET_COUNT presses
# for button in button_targets:
#     idx = synthetic_df[synthetic_df[button] == 1].index
#     if len(idx) > TARGET_COUNT:
#         to_zero = idx[TARGET_COUNT:]  # keep only first TARGET_COUNT
#         synthetic_df.loc[to_zero, button] = 0

# # Step 4: Drop unwanted columns
# synthetic_df = synthetic_df.drop(columns=drop_columns, errors='ignore')

# # Step 5: Shuffle and filter
# synthetic_df = synthetic_df.sample(frac=1, random_state=42).reset_index(drop=True)
# synthetic_df = synthetic_df[synthetic_df[button_targets].any(axis=1)]

# # Step 6: Save to CSV
# synthetic_df.to_csv("synthetic_balanced_fixed.csv", index=False)

# # Confirm
# print("Final counts (each should be 20000):")
# print(synthetic_df[button_targets].sum())
# print("Final dataset shape:", synthetic_df.shape)


Final counts (each should be 20000):
player1_buttons up          20000
player1_buttons down        20000
player1_buttons left        20000
player1_buttons right       20000
player1.player_buttons.A    20000
player1.player_buttons.B    20000
player1.player_buttons.Y    20000
player1.player_buttons.R    20000
player1.player_buttons.L    20000
dtype: int64
Final dataset shape: (139172, 19)


In [None]:
# print(synthetic_df.columns)


Index(['x_coord', 'y_coord', 'player1_buttons up', 'player1_buttons down',
       'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R',
       'player1.player_buttons.L', 'Player2_ID', 'Player2 health',
       'Player2 x_coord', 'Player2 y_coord', 'Player2 is_jumping',
       'Player2 is_crouching', 'Player2 is_player_in_move', 'Player2 move_id'],
      dtype='object')


In [None]:
# import pandas as pd
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import classification_report


In [4]:
# # 1. Import Libraries
# import pandas as pd
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import classification_report
# import joblib
# import os

# # 2. Load Dataset
# df = pd.read_csv("synthetic_balanced_fixed.csv")  # Replace with your filename

# # 3. Drop all Player2 Columns
# df = df[[col for col in df.columns if not col.startswith('player2')]]

# # 4. List of Player1 Button Columns to Predict
# button_targets = [
#     'player1_buttons up',
#     'player1_buttons down',
#     'player1_buttons left',
#     'player1_buttons right',
#     'player1.player_buttons.A',
#     'player1.player_buttons.B',
#     'player1.player_buttons.Y',
#     'player1.player_buttons.R',
#     'player1.player_buttons.L'
# ]

# df = df[df[button_targets].any(axis=1)]
# # 5. Make Directory to Save Models
# os.makedirs("saved_models", exist_ok=True)

# # 6. Train & Save Model for Each Button
# for target in button_targets:
#     print(f"\nTraining model for: {target}")

#     # Define features: drop all targets
#     X = df.drop(columns=target)
#     y = df[target]

#     # Train-test split
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#     # Train classifier
#     clf = RandomForestClassifier(n_estimators=100, random_state=42)
#     clf.fit(X_train, y_train)

#     # Evaluate
#     y_pred = clf.predict(X_test)
#     print(classification_report(y_test, y_pred))

#     # Save model
#     model_filename = f"saved_models/{target.replace(' ', '_').replace('.', '')}_model.pkl"
#     joblib.dump(clf, model_filename)
#     print(f"Saved model to: {model_filename}")


In [None]:
# print(X_train.columns)

Index(['timer', 'fight_result', 'has_round_started', 'is_round_over',
       'Player1_ID', 'health', 'x_coord', 'y_coord', 'is_jumping',
       'is_crouching', 'is_player_in_move', 'move_id', 'player1_buttons up',
       'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R', 'Player2_ID',
       'Player2 health', 'Player2 x_coord', 'Player2 y_coord',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id'],
      dtype='object')


In [3]:
# input_data = {
#     'timer': 60,
#     'fight_result': 0,
#     'has_round_started': 1,
#     'is_round_over': 0,
#     'Player1_ID': 1,
#     'health': 120,
#     'x_coord': 150,
#     'y_coord': 50,
#     'is_jumping': 1,
#     'is_crouching': 0,
#     'is_player_in_move': 1,
#     'move_id': 2,
#     'player1_buttons up': 0,
#     'player1_buttons down': 0,
#     'player1_buttons right': 0,
#     'player1_buttons left': 0,
#     'player1.player_buttons.A': 1,
#     'player1.player_buttons.B': 0,
#     'player1.player_buttons.Y': 0,
#     'player1.player_buttons.R': 0,
#     'player1.player_buttons.L': 0,
#     "Player2_ID":6,
#     "Player2 health":176,
#     "Player2 x_coord":200,
#     "Player2 y_coord":100,
#     "Player2 is_jumping":0,
#     "Player2 is_crouching":0,
#     "Player2 is_player_in_move":0,
#     "Player2 move_id":0
# }

# input_df = pd.DataFrame([input_data])

# model = joblib.load("saved_models/player1_buttons_up_model.pkl")

# # Predict
# prediction = model.predict(input_df)

# print(f"Predicted value for 'player1_buttons up': {prediction[0]}")

In [None]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd 
import os 
import joblib

# 2. Load Dataset
df = pd.read_csv("keyp2.csv")  # Replace with your filename

# 3. Drop all Player2 Columns
df = df[[col for col in df.columns if not col.startswith('Player2_but')]]
print(df.columns)
# 4. List of Player1 Button Columns to Predict
button_targets = [
    'player1_buttons up',
    'player1_buttons down',
    'player1_buttons left',
    'player1_buttons right',
    'player1.player_buttons.A',
    'player1.player_buttons.B',
    'player1.player_buttons.Y',
    'player1.player_buttons.R',
    'player1.player_buttons.L'
]

df = df[df[button_targets].any(axis=1)]

os.makedirs("saved_models", exist_ok=True)

for target in button_targets:
    print(f"\nTraining LightGBM model for: {target}")

    X = df.drop(columns=target)
    X = X.drop(columns=['timer','combo_present'])
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    clf = lgb.LGBMClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))
    
    joblib.dump(clf, f"saved_modelsp2/{target.replace(' ', '_').replace('.', '')}_model.pkl")


Index(['timer', 'fight_result', 'has_round_started', 'is_round_over',
       'Player1_ID', 'health', 'x_coord', 'y_coord', 'is_jumping',
       'is_crouching', 'is_player_in_move', 'move_id', 'player1_buttons up',
       'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R',
       'player1.player_buttons.L', 'Player2_ID', 'Player2 health',
       'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id', 'combo_present'],
      dtype='object')

Training LightGBM model for: player1_buttons up
[LightGBM] [Info] Number of positive: 88, number of negative: 2536
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total

In [62]:
print(X.columns)

Index(['fight_result', 'has_round_started', 'is_round_over', 'Player1_ID',
       'health', 'x_coord', 'y_coord', 'is_jumping', 'is_crouching',
       'is_player_in_move', 'move_id', 'player1_buttons up',
       'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R', 'Player2_ID',
       'Player2 health', 'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id'],
      dtype='object')


In [51]:
button_targets = [
    'player1_buttons up',
    'player1_buttons down',
    'player1_buttons left',
    'player1_buttons right',
    'player1.player_buttons.A',
    'player1.player_buttons.B',
    'player1.player_buttons.Y',
    'player1.player_buttons.R',
    'player1.player_buttons.L'
]
# X_train[button_targets].sum()

In [60]:
print(X.columns)

Index(['fight_result', 'has_round_started', 'is_round_over', 'Player1_ID',
       'health', 'x_coord', 'y_coord', 'is_jumping', 'is_crouching',
       'is_player_in_move', 'move_id', 'player1_buttons up',
       'player1_buttons down', 'player1_buttons right', 'player1_buttons left',
       'player1.player_buttons.A', 'player1.player_buttons.B',
       'player1.player_buttons.Y', 'player1.player_buttons.R', 'Player2_ID',
       'Player2 health', 'Player2 is_jumping', 'Player2 is_crouching',
       'Player2 is_player_in_move', 'Player2 move_id', 'combo_present'],
      dtype='object')


In [63]:
import pandas as pd
import joblib

input_data = {
    # 'timer': 100,
    'fight_result': 0,
    'has_round_started': 1,
    'is_round_over': 0,
    'Player1_ID': 3,
    'health': 120,
    'x_coord': 200,
    'y_coord': 192,
    'is_jumping': 0,
    'is_crouching': 0,
    'is_player_in_move': 0,
    'move_id': 0,
    'player1_buttons up': 0,
    'player1_buttons down': 0,
    'player1_buttons right': 0,
    'player1_buttons left': 0,
    'player1.player_buttons.A': 0,
    'player1.player_buttons.B': 0,
    'player1.player_buttons.Y': 0,
    'player1.player_buttons.R': 0,
    'player1.player_buttons.L': 0,
    "Player2_ID": 6,
    "Player2 health": 120,
    "Player2 is_jumping": 1,
    "Player2 is_crouching": 0,
    "Player2 is_player_in_move": 0,
    "Player2 move_id": 0
}
print(len(input_data))
button_targets = [
    'player1_buttons up',
    'player1_buttons down',
    'player1_buttons right',
    'player1_buttons left',
    'player1.player_buttons.A',
    'player1.player_buttons.B',
    'player1.player_buttons.Y',
    'player1.player_buttons.R',
    'player1.player_buttons.L'
]

original_df = pd.DataFrame([input_data])

predicted_buttons = {}

for target in button_targets:
    input_features = original_df.drop(columns=[target], errors='ignore')
    model_path = f"saved_modelsp2/{target.replace(' ', '_').replace('.', '')}_model.pkl"
    model = joblib.load(model_path)

    prediction = model.predict(input_features)[0]
    predicted_buttons[target] = prediction

print("\nPredicted Player1 button states:")
for key, value in predicted_buttons.items():
    print(f"{key}: {value}")


26

Predicted Player1 button states:
player1_buttons up: 0
player1_buttons down: 0
player1_buttons right: 1
player1_buttons left: 1
player1.player_buttons.A: 0
player1.player_buttons.B: 1
player1.player_buttons.Y: 1
player1.player_buttons.R: 0
player1.player_buttons.L: 0


In [64]:
print(len(input_data))

26
