In [1]:
import pandas as pd 


In [2]:
df = pd.read_excel("K3_data.xlsx", index_col="Period")

In [4]:
df.columns

Index(['Sum', 'Big_Small', 'Odd_Even'], dtype='object')

In [3]:
df.head()

Unnamed: 0_level_0,Sum,Big_Small,Odd_Even
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20240804090727,16,Big,Even
20240804090726,6,Small,Even
20240804090725,10,Small,Even
20240804090724,9,Small,Odd
20240804090723,6,Small,Even


In [5]:
df.index

Index([20240804090727, 20240804090726, 20240804090725, 20240804090724,
       20240804090723, 20240804090722, 20240804090721, 20240804090720,
       20240804090719, 20240804090718,
       ...
       20240724090010, 20240724090009, 20240724090008, 20240724090007,
       20240724090006, 20240724090005, 20240724090004, 20240724090003,
       20240724090002, 20240724090001],
      dtype='int64', name='Period', length=16567)

Arranging the past results of the look_back period in a row for every result

In [3]:


def create_datasheet(df, look_back=10):
    # Initialize the k3_eda DataFrame with required columns
    columns = ['Big_small_Result', 'Odd_Even_Result', 'Sum_Result', 'Time', 'Date', 'Day_of_Week']

    # Dynamically add look-back columns
    for n in range(1, look_back + 1):
        columns.extend([f"Big_small_{n}", f"Odd_Even_{n}", f"Sum_{n}"])

    # Add columns for counting "Even" and "Big" occurrences within the look-back period
    for n in range(1, look_back + 1):
        columns.extend([f"Even_count_{n}", f"Big_count_{n}"])

    k3_eda = pd.DataFrame(columns=columns)

    # Populate the k3_eda DataFrame
    for i in range(len(df)):
        period_str = str(df.index[i])
        date_str = period_str[:8]  # Extract YYYYMMDD
        time_str = period_str[-4:]  # Extract HHMM

        # Convert date_str to a datetime object
        date_obj = pd.to_datetime(date_str, format='%Y%m%d')

        # Append a new row to k3_eda
        new_row = {
            'Big_small_Result': df['Big_Small'].iloc[i],
            'Odd_Even_Result': df['Odd_Even'].iloc[i],
            'Sum_Result': df['Sum'].iloc[i],
            'Time': time_str,
            'Date': date_obj.strftime('%d'),  # Extract just the day portion
            'Day_of_Week': date_obj.day_name()
        }

        # Add look-back values and count "Even" and "Big" occurrences
        for n in range(1, look_back + 1):
            if i + n < len(df):
                new_row[f"Big_small_{n}"] = df['Big_Small'].iloc[i + n]
                new_row[f"Odd_Even_{n}"] = df['Odd_Even'].iloc[i + n]
                new_row[f"Sum_{n}"] = df['Sum'].iloc[i + n]

                # Count occurrences within the look-back period
                new_row[f"Even_count_{n}"] = (df['Odd_Even'].iloc[i+1:i+n+1] == 'Even').sum()
                new_row[f"Big_count_{n}"] = (df['Big_Small'].iloc[i+1:i+n+1] == 'Big').sum()
            else:
                new_row[f"Big_small_{n}"] = None
                new_row[f"Odd_Even_{n}"] = None
                new_row[f"Sum_{n}"] = None
                new_row[f"Even_count_{n}"] = None
                new_row[f"Big_count_{n}"] = None

        # Convert the new row to DataFrame and concatenate with k3_eda
        new_row_df = pd.DataFrame(new_row, index=[0])
        k3_eda = pd.concat([k3_eda, new_row_df], ignore_index=True)

    return k3_eda


In [5]:
def generate_dummies_column_names(n):
    base_columns = ['Odd_Even_Result', 'Day_of_Week']
    for i in range(1, n + 1):
        base_columns.append(f'Big_small_{i}')
        base_columns.append(f'Odd_Even_{i}')
    return base_columns

In [6]:
def generate_drop_column_names(n):
    base_columns = ['Odd_Even_Result_Odd']
    for i in range(1, n + 1):
        base_columns.append(f'Big_small_{i}_Small')
        base_columns.append(f'Odd_Even_{i}_Odd')
    return base_columns

In [29]:
look_back = 30
k3_eda = create_datasheet(df, look_back)
k3_eda.dropna(inplace=True)
# Doing for Odd-even so deleting big-small and sum

k3_eda.drop(['Big_small_Result',"Sum_Result"],axis=1, inplace= True)



In [30]:
dummies_columns = generate_dummies_column_names(look_back)

k3_eda = pd.get_dummies(k3_eda, columns= dummies_columns)

drop_columns = generate_drop_column_names(look_back)
# 1 means even, 0 means odd. 1 means Big, 0 means small
k3_eda.drop(columns= drop_columns, inplace= True)

In [8]:
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC


In [31]:
# Splitting features and target
X = k3_eda.drop(columns=['Odd_Even_Result_Even'])
y = k3_eda['Odd_Even_Result_Even']

In [28]:
X.columns

Index(['Time', 'Date', 'Sum_1', 'Sum_2', 'Sum_3', 'Sum_4', 'Sum_5', 'Sum_6',
       'Sum_7', 'Sum_8', 'Sum_9', 'Sum_10', 'Even_count_1', 'Big_count_1',
       'Even_count_2', 'Big_count_2', 'Even_count_3', 'Big_count_3',
       'Even_count_4', 'Big_count_4', 'Even_count_5', 'Big_count_5',
       'Even_count_6', 'Big_count_6', 'Even_count_7', 'Big_count_7',
       'Even_count_8', 'Big_count_8', 'Even_count_9', 'Big_count_9',
       'Even_count_10', 'Big_count_10', 'Day_of_Week_Friday',
       'Day_of_Week_Monday', 'Day_of_Week_Saturday', 'Day_of_Week_Sunday',
       'Day_of_Week_Thursday', 'Day_of_Week_Tuesday', 'Day_of_Week_Wednesday',
       'Big_small_1_Big', 'Odd_Even_1_Even', 'Big_small_2_Big',
       'Odd_Even_2_Even', 'Big_small_3_Big', 'Odd_Even_3_Even',
       'Big_small_4_Big', 'Odd_Even_4_Even', 'Big_small_5_Big',
       'Odd_Even_5_Even', 'Big_small_6_Big', 'Odd_Even_6_Even',
       'Big_small_7_Big', 'Odd_Even_7_Even', 'Big_small_8_Big',
       'Odd_Even_8_Even', 'Big_small

In [37]:
columns = X.columns
dump(columns, 'k3_Odd_Even_columns(30).joblib')

['k3_Odd_Even_columns(30).joblib']

In [32]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=6)

In [33]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [36]:
from joblib import dump, load
dump(scaler, 'k3_Odd_Even_scaler(30).joblib')

['k3_Odd_Even_scaler(30).joblib']

Checking the performance of some popular ML models, from which the best performing ones will be selected for Ensemble method

In [27]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Define your models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))


# Evaluate each model in turn and display detailed performance
results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=10)
    cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    
    # Train the model on the training set
    model.fit(X_train, y_train)
    
    # Predict on the test set
    predictions = model.predict(X_test)
    
    # Display detailed performance metrics
    print(f"\n{name} Model Performance:")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, predictions))
    print("\nClassification Report:")
    print(classification_report(y_test, predictions))
    print("Accuracy Score: %f" % accuracy_score(y_test, predictions))
    print("\n" + "-"*60 + "\n")


LR: 0.499135 (0.017231)

LR Model Performance:
Confusion Matrix:
[[1202 1261]
 [1214 1288]]

Classification Report:
              precision    recall  f1-score   support

       False       0.50      0.49      0.49      2463
        True       0.51      0.51      0.51      2502

    accuracy                           0.50      4965
   macro avg       0.50      0.50      0.50      4965
weighted avg       0.50      0.50      0.50      4965

Accuracy Score: 0.501511

------------------------------------------------------------

LDA: 0.499048 (0.017296)

LDA Model Performance:
Confusion Matrix:
[[1203 1260]
 [1216 1286]]

Classification Report:
              precision    recall  f1-score   support

       False       0.50      0.49      0.49      2463
        True       0.51      0.51      0.51      2502

    accuracy                           0.50      4965
   macro avg       0.50      0.50      0.50      4965
weighted avg       0.50      0.50      0.50      4965

Accuracy Score: 0.501309

Logistic Regression, Linear Discriminant analysis and KNN has been observed to produce the best result, hence these 3 have been selected for ensemble method

In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score



#  Initialize the individual models
lr = LogisticRegression()    #max_iter=200
lda = LinearDiscriminantAnalysis()
knn = KNeighborsClassifier()    #n_neighbors=5

# Create an ensemble model using VotingClassifier
ensemble_model = VotingClassifier(estimators=[
    ('lr', lr),
    ('lda', lda),
    ('knn', knn)
], voting='hard')

# Train the ensemble model
ensemble_model.fit(X_train, y_train)

#  Make predictions on the test set
y_pred = ensemble_model.predict(X_test)

#  Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Ensemble Model Accuracy: {accuracy:.2f}')

#  Evaluate individual models
lr.fit(X_train, y_train)
lda.fit(X_train, y_train)
knn.fit(X_train, y_train)

lr_pred = lr.predict(X_test)
lda_pred = lda.predict(X_test)
knn_pred = knn.predict(X_test)

print(f'Logistic Regression Accuracy: {accuracy_score(y_test, lr_pred):.2f}')
print(f'LDA Accuracy: {accuracy_score(y_test, lda_pred):.2f}')
print(f'KNN Accuracy: {accuracy_score(y_test, knn_pred):.2f}')


Ensemble Model Accuracy: 0.51
Logistic Regression Accuracy: 0.51
LDA Accuracy: 0.51
KNN Accuracy: 0.51


In [35]:
from joblib import dump, load

# Save the trained model to a file
dump(ensemble_model, 'k3_Odd_Even_ensemble_model(30).joblib')


['k3_Odd_Even_ensemble_model(30).joblib']

Checking the performance of LSTM

In [12]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam


# Build the LSTM model
def build_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train the model
def train_model(model, X_train, y_train, X_test, y_test, epochs=50, batch_size=1):
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
    return history

# Evaluate the model
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")


# Build and train the model
model = build_model((X_train.shape[1], 1))
history = train_model(model, X_train, y_train, X_test, y_test)

# Evaluate the model
evaluate_model(model, X_test, y_test)






Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model Accuracy: 50.85%


# Final Verdict

An accuracy of around 51% has been achieved using the ensemble method, and 50.85% using LSTM. However, proceeding with only 1% edge is not advisable as the Risk:Reward ratio is only 1:1. Also, it can be said that the Casino results are boderline random and gamblers claiming to be able to predict the pattern are victim of Gambler's fallacy.