In [58]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [60]:
lottery_data = pd.read_csv("Mahajana Sampatha.csv")

In [62]:
lottery_data.head()

Unnamed: 0,ID,Day,Letter,1st num,2nd num,3rd num,4th num,5th num,6th num
0,6111,Friday,z,8,9,1,0,7,6
1,6110,Thursday,G,4,1,1,3,0,8
2,6109,Wednesday,J,0,4,2,5,0,7
3,6108,Tuesday,K,7,2,1,3,0,9
4,6107,Monday,U,7,1,1,7,6,7


# Data Preprocessing

In [65]:
lottery_data = lottery_data.sort_values(by='ID').reset_index(drop=True)

In [67]:
# Encode 'Letter' column
letter_encoder = LabelEncoder()

In [69]:
lottery_data['Letter_encoded'] = letter_encoder.fit_transform(lottery_data['Letter'])

In [71]:
# Encode the 'Day' column of the week to numbers (0 to 6)
day_map = {
    'Monday':0, 'Tuesday':1, 'Wednesday':2, 'Thursday':3, 'Friday':4, 'Saturday':5, 'Sunday':6
}
lottery_data['Day_encoded']= lottery_data['Day'].map(day_map)

In [73]:
# Feature engineering(sliding window)
# we use a 'lag' of 5, meaning use the results of the last 5 draws
# to predict the outcome of the next draw
lag = 5
cols_to_use = ['Letter_encoded','1st num', '2nd num', '3rd num', '4th num', '5th num','6th num']

x=[]
y=[]

for i in range (lag, len(lottery_data)):
    # Features: Last 5 draws flattened into a single row + the Day of the current draw
    features = lottery_data.iloc[i-lag:i][cols_to_use].values.astype(int).flatten()
    features = np.append(features, lottery_data.iloc[i]['Day_encoded'])
    x.append(features)

    # Target: The current draw's letter and Numbers
    y.append(lottery_data.iloc[i][cols_to_use].values.astype(int))

x = np.array(x)
y = np.array(y)

# Model Training

In [76]:
# Here train 7 seperate Random Forest Models (1 for the letter, others for numbers)
models = {}
for idx, col_name in enumerate(cols_to_use):
    model = RandomForestClassifier(n_estimators=100, random_state = 42)
    model.fit(x, y[:, idx])
    models[col_name] = model

# Model Evaluation

In [79]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, shuffle=False)
# here shuffle = 'False' means lottery draws are a time-series; we must test on the FUTURE

print("------Model Evaluation Report------")
evaluation_metrics = {}

for idx, col_name in enumerate(cols_to_use):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(x_train, y_train[:, idx])

    # Predict on the hidden test set
    y_pred = model.predict(x_test)

    # Calculate accuracy
    acc = accuracy_score(y_test[:, idx], y_pred)
    evaluation_metrics[col_name] = acc
    print(f"Accuracy for {col_name}: {acc: .2%}")


------Model Evaluation Report------
Accuracy for Letter_encoded:  0.00%
Accuracy for 1st num:  4.65%
Accuracy for 2nd num:  11.63%
Accuracy for 3rd num:  11.63%
Accuracy for 4th num:  11.63%
Accuracy for 5th num:  13.95%
Accuracy for 6th num:  11.63%


In [85]:
final_models = {}
for idx, col_name in enumerate(cols_to_use):
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(x, y[:, idx])
    final_models[col_name] = clf

# Determine values for the next draw 
last_id = lottery_data['ID'].max()
next_id = last_id + 1
last_day_num = lottery_data.iloc[-1]['Day_encoded']
next_day_num = (last_day_num + 1) % 7 

# Input for prediction: Last 5 draws + next day number
last_5_draws = lottery_data.tail(lag)[cols_to_use].values.astype(int).flatten()
X_next = np.append(last_5_draws, next_day_num).reshape(1, -1)

# Generate predictions
pred_vals = {}
for col_name in cols_to_use:
    pred_vals[col_name] = final_models[col_name].predict(X_next)[0]

# Decode the letter back from a number
pred_letter = letter_encoder.inverse_transform([int(pred_vals['Letter_encoded'])])[0]

print(f"\n--- PREDICTION FOR DRAW ID {next_id} ---")
print(f"Predicted Letter: {pred_letter}")
print(f"Predicted Numbers: {pred_vals['1st num']}, {pred_vals['2nd num']}, {pred_vals['3rd num']}, {pred_vals['4th num']}, {pred_vals['5th num']}, {pred_vals['6th num']}")



--- PREDICTION FOR DRAW ID 6112 ---
Predicted Letter: K
Predicted Numbers: 3, 9, 1, 3, 7, 4
