In [None]:
import pandas as pd

In [None]:
data = pd.read_csv('data.csv')

data.sort_values(by=['period'], inplace=True, ascending=False)

data.head()

In [None]:
data.drop("price", axis=1, inplace=True)

data['count_of_the_day'] = data['period'].apply(lambda x: int(str(x)[8:]))

data['period'] = data['period'].apply(lambda x: int(str(x)[0:8]))

data.head()

In [None]:
data['target_colour'] = data['target_colour'] - 1

data.head()
og_data = data.copy()


In [None]:
def previous_counts(row, df):
    period = row['period']
    count_of_the_day = row['count_of_the_day']
    same_day_records = df[(df['period'] == period) & (df['count_of_the_day'] < count_of_the_day)]
    if same_day_records.empty:
        return pd.Series({'previous_25_numbers': [], 'previous_25_colours': []})
    lower_limit = max(count_of_the_day - 25, same_day_records['count_of_the_day'].min())
    prev_records = same_day_records[same_day_records['count_of_the_day'].between(lower_limit, count_of_the_day - 1)]
    return pd.Series({'previous_25_numbers': prev_records['target_number'].values, 'previous_25_colours': prev_records['target_colour'].values})

data[['previous_25_numbers', 'previous_25_colours']] = data.apply(previous_counts, axis=1, args=(data,))
    
data.head()

In [None]:
data.describe()

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Masking, Bidirectional, LSTM, Dense, concatenate, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.preprocessing.sequence import pad_sequences

import numpy as np

In [None]:
data['previous_25_numbers'] = data['previous_25_numbers'].apply(lambda x: np.array(x))
data['previous_25_colours'] = data['previous_25_colours'].apply(lambda x: np.array(x))

In [None]:
X_numbers = pad_sequences(data['previous_25_numbers'], maxlen=25, padding='pre', value=-1)
X_colours = pad_sequences(data['previous_25_colours'], maxlen=25, padding='pre', value=-1)

# X_period_count = data[['period', 'count_of_the_day']].values
X_period_count = data[['count_of_the_day']].values

In [None]:
y_number = data['target_number']
y_colour = data['target_colour']

num_classes_number = len(np.unique(y_number))
num_classes_colour = len(np.unique(y_colour))

y_encoded_number = to_categorical(y_number, num_classes=num_classes_number, dtype="int32")
y_encoded_colour = to_categorical(y_colour, num_classes=num_classes_colour, dtype="int32")

In [None]:
input_numbers = Input(shape=(25,1))
input_colours = Input(shape=(25,1))
input_period_count = Input(shape=(1,))

masked_numbers = Masking(mask_value=-1)(input_numbers)
masked_colours = Masking(mask_value=-1)(input_colours)

lstm_numbers = Bidirectional(LSTM(128, return_sequences=True))(masked_numbers)
lstm_colours = Bidirectional(LSTM(128, return_sequences=True))(masked_colours)

lstm_numbers_flat = Flatten()(lstm_numbers)
lstm_colours_flat = Flatten()(lstm_colours)

concatenated_layers = concatenate([lstm_numbers_flat, lstm_colours_flat, input_period_count])
dense = Dense(64, activation='relu')(concatenated_layers)
dropout = Dropout(0.5)(dense)
dense = Dense(64, activation='elu')(dropout)
batch = BatchNormalization()(dense)
dense = Dense(64, activation='relu')(batch)

output_number = (Dense(10, activation='softmax'))(batch)
output_colour = (Dense(2, activation='softmax'))(batch)

model = Model(inputs=[input_numbers, input_colours, input_period_count], outputs=[output_number, output_colour])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit([X_numbers, X_colours, X_period_count], [y_encoded_number, y_encoded_colour], epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

In [None]:
model.summary()
model.save('model.h5')

In [None]:
def get_previous_records(period, count_of_the_day, df):
    same_day_records = df[(df['period'] == period) & (df['count_of_the_day'] < count_of_the_day)]
    if same_day_records.empty:
        return [], []
    lower_limit = max(count_of_the_day - 25, same_day_records['count_of_the_day'].min())
    prev_records = same_day_records[same_day_records['count_of_the_day'].between(lower_limit, count_of_the_day - 1)]
    return prev_records['target_number'].values, prev_records['target_colour'].values

def predictor(model, value):
    period = int(value[:8])
    count_of_the_day = int(value[8:])
    
    # period_count = np.array([period, count_of_the_day]).reshape(1, -1)
    period_count = np.array([count_of_the_day]).reshape(1, -1)
    
    previous_numbers, previous_colours = get_previous_records(period, count_of_the_day, og_data)
    
    previous_numbers = pad_sequences([previous_numbers], maxlen=25, padding='pre', value=-1)
    previous_colours = pad_sequences([previous_colours], maxlen=25, padding='pre', value=-1)
    
    print(period_count, previous_numbers, previous_colours)
    
    predicted_val = model.predict([previous_numbers, previous_colours, period_count])
    
    return np.argmax(predicted_val[0]), np.argmax(predicted_val[1])

predictor(model, '20231116387')