In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [None]:
# Load dataset
df = pd.read_csv('drive/MyDrive/lstm_dataset_reduced.csv')


In [None]:
X = df.drop('cheater', axis=1).values
y = df['cheater'].values

In [None]:
X.shape

(69120000, 4)

In [None]:
n_players = 12000
n_engagements = 30
n_timesteps = 192
n_features = 4

In [None]:
X_reshaped = X.reshape(n_players, n_engagements, n_timesteps, n_features)


In [None]:
X_reshaped.shape

(12000, 30, 192, 4)

In [None]:
# y_reshaped is actually just 2000 ones followed by 10000 zeros because of the ordering of the original combined dataset
y_reshaped = np.concatenate((np.ones(2000), np.zeros(10000)))
y_reshaped.shape

(12000,)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_reshaped, test_size=0.2, stratify=y_reshaped, random_state=31)

In [None]:
lstm_model = Sequential()
lstm_model.add(LSTM(64, return_sequences=False, input_shape=(n_timesteps, n_features)))  # LSTM only returns the final hidden state
lstm_model.add(Dropout(0.2))

  super().__init__(**kwargs)


In [None]:
lstm_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy')

In [None]:
# class weights
total_samples = len(y_reshaped)
n_classes = len(class_counts)
class_counts = np.bincount(y_reshaped.astype(int))
weights = total_samples / (n_classes * class_counts)
class_weights = dict(enumerate(weights))

In [None]:
class_weights

{0.0: 1.2, 1.0: 6.0}

In [None]:
# get engagement representations for each player and engagement
def get_engagement_representations(X_train, lstm_model):

    engagement_representations = []

    for player in X_train:
        player_reps = []
        for engagement in player:

            engagement_rep = lstm_model.predict(engagement.reshape(1, n_timesteps, n_features))
            player_reps.append(engagement_rep)

        player_aggregated_rep = np.mean(player_reps, axis=0)
        engagement_representations.append(player_aggregated_rep)

    return np.array(engagement_representations)


In [None]:
X_train_aggregated = get_engagement_representations(X_train, lstm_model)
X_test_aggregated = get_engagement_representations(X_test, lstm_model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

Exception ignored in: <function _xla_gc_callback at 0x7bae735bb910>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
KeyboardInterrupt: 


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step

In [None]:
# final player-level classification
final_model = Sequential()
final_model.add(Dense(1, activation='sigmoid', input_shape=(64,)))  # same size as the aggregated representation

# compile the final player-level model
final_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# fit the final model
final_model.fit(X_train_aggregated, y_train, epochs=10, batch_size=32, \
                validation_split=0.2, class_weights=class_weights, verbose=1)
