In [7]:
import pandas as pd

pd.set_option("display.max_rows", 15000)
pd.set_option("display.max_columns", 1000)
import boto3
import awswrangler as wr
import numpy as np
boto3.setup_default_session(region_name="il-central-1")

In [None]:
database = "winner-db"

gold = wr.athena.read_sql_query("SELECT * FROM gold", database=database)

In [3]:
grouped_gold = list(gold.sort_values(by="run_time").groupby(["id", "option1"]))

In [None]:
gold.head()

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def prepare_data(grouped_data, MAX_SEQ_LENGTH=1000):
    X_train, y_train, X_test, y_test = [], [], [], []
    for name, group_df in grouped_data:
        # Separate features (odds & ratios) and target
        features = group_df[["ratio1", "ratio2", "ratio3"]].astype(float).fillna(0.0)
        target = group_df[["bet1_won", "bet2_won", "tie_won"]].values[-1].astype(float)

        padding_length = MAX_SEQ_LENGTH - features.shape[0]  # Calculate padding length

        if padding_length > 0:
            padded_features = np.zeros((MAX_SEQ_LENGTH, features.shape[1]))
            padded_features[padding_length:, :] = (
                features.to_numpy()
            )  # Copy data from the end
        else:
            padded_features = features.to_numpy()  # No padding needed, use data as is

        # Randomly assign to training or testing set (80% train, 20% test)
        if np.random.rand() < 0.8:
            X_train.append(padded_features)
            y_train.append(target)  # Use the last target
        else:
            X_test.append(padded_features)
            y_test.append(target)

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)

    return X_train, y_train, X_test, y_test

# Prepare training and testing data
X_train, y_train, X_test, y_test = prepare_data(grouped_gold)

# Define and build the LSTM model
model = Sequential()
model.add(
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))
)
model.add(LSTM(32, activation="sigmoid", return_sequences=True))

model.add(LSTM(32, activation="sigmoid"))
model.add(Dropout(0.2))
model.add(Dense(3, activation="sigmoid"))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,clipvalue=1.0)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model (optional)
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

model.save("model_prototype.h5")