In [29]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from torch.utils.data import Dataset, DataLoader
import pickle



FEATURE ENGINEERING
 

In [30]:
# Load dataset
def load_data(file_path):
    df = pd.read_csv(file_path)  
    df['code'] = df['code'].apply(eval)  # Convert string to list
    df['num1'] = df['code'].apply(lambda x: int(x[0]))
    df['num2'] = df['code'].apply(lambda x: int(x[1]))
    df['num3'] = df['code'].apply(lambda x: int(x[2]))
    df['sum'] = df['num1'] + df['num2'] + df['num3']
    df['odd_even'] = df['sum'] % 2  # 0 = Even, 1 = Odd
    df['big_small'] = (df['sum'] >= 14).astype(int)  # 0 = Small, 1 = Big
    
    # Feature Engineering (Rolling Mean and Lag Features)
    df['rolling_sum_mean'] = df['sum'].rolling(window=3, min_periods=1).mean()
    df['lag1_sum'] = df['sum'].shift(1)
    df['lag1_odd_even'] = df['odd_even'].shift(1)
    df['lag1_big_small'] = df['big_small'].shift(1)

    return df[['num1', 'num2', 'num3', 'sum', 'rolling_sum_mean', 'lag1_sum', 'odd_even', 'big_small', 'lag1_odd_even', 'lag1_big_small']]


In [31]:
data_file = "newlucky28.csv" 
df = load_data(data_file)

HANDLLING  MISSSING VALUES


In [32]:
print("Missing Values Before Handling:\n", df.isnull().sum())


Missing Values Before Handling:
 num1                0
num2                0
num3                0
sum                 0
rolling_sum_mean    0
lag1_sum            1
odd_even            0
big_small           0
lag1_odd_even       1
lag1_big_small      1
dtype: int64


In [33]:
missing_columns = ['rolling_sum_mean', 'lag1_sum', 'lag1_odd_even', 'lag1_big_small']


In [34]:
df[missing_columns] = df[missing_columns].fillna(df[missing_columns].median())


In [35]:
print("Missing Values after Handling:\n", df.isnull().sum())


Missing Values after Handling:
 num1                0
num2                0
num3                0
sum                 0
rolling_sum_mean    0
lag1_sum            0
odd_even            0
big_small           0
lag1_odd_even       0
lag1_big_small      0
dtype: int64


TRAIN AND TEST DATA SPLIT

In [36]:
# Split dataset (80% Train, 20% Test ensuring 100 draws in test set)
test_size = max(0.2, 100 / len(df)) 
train_df, test_df = train_test_split(df, test_size=test_size, random_state=42, shuffle=False)

FEATURE SCALLING AND ENCODING

In [37]:
# Scale numerical features
scaler = StandardScaler()
numerical_features = ['sum', 'rolling_sum_mean', 'lag1_sum']
train_df[numerical_features] = scaler.fit_transform(train_df[numerical_features])
test_df[numerical_features] = scaler.transform(test_df[numerical_features])


In [38]:
# One-Hot Encode categorical features
categorical_features = ['odd_even', 'big_small', 'lag1_odd_even', 'lag1_big_small']

# Combine train and test before encoding
combined_df = pd.concat([train_df[categorical_features], test_df[categorical_features]], axis=0)

# Fit encoder on the combined dataset
encoder = OneHotEncoder(drop='first', sparse_output=False)
encoded_combined = encoder.fit_transform(combined_df)

# Convert back to DataFrame
encoded_combined_df = pd.DataFrame(encoded_combined, columns=encoder.get_feature_names_out(categorical_features))

# Split back into train and test
train_encoded_df = encoded_combined_df.iloc[:len(train_df)].reset_index(drop=True)
test_encoded_df = encoded_combined_df.iloc[len(train_df):].reset_index(drop=True)

# Drop original categorical features
train_df = train_df.drop(columns=categorical_features).reset_index(drop=True)
test_df = test_df.drop(columns=categorical_features).reset_index(drop=True)

# Concatenate encoded categorical features with numerical features
train_df = pd.concat([train_df, train_encoded_df], axis=1)
test_df = pd.concat([test_df, test_encoded_df], axis=1)


In [39]:
# Verify Encoding of Target Labels
print("Unique values in odd_even:", df['odd_even'].unique())
print("Unique values in big_small:", df['big_small'].unique())

Unique values in odd_even: [1 0]
Unique values in big_small: [1 0]


SAVING PROCESSED DATA


In [40]:

# Store processed datasets
train_df.to_csv("train.csv", index=False)
test_df.to_csv("test.csv", index=False)


PYTORCH DATASET LOADER


In [41]:
class LotteryDataset(Dataset):
    def __init__(self, file_path, target_column):
        data = pd.read_csv(file_path)
        self.X = torch.tensor(data.drop(columns=[target_column]).values, dtype=torch.float32)
        self.y = torch.tensor(data[target_column].values, dtype=torch.long)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [42]:
# Create DataLoaders
def get_dataloaders(train_file, test_file, target_column, batch_size=64):
    train_dataset = LotteryDataset(train_file, target_column)
    test_dataset = LotteryDataset(test_file, target_column)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    
    return train_loader, test_loader

In [43]:

# Create DataLoaders for Odd/Even and Big/Small prediction tasks
train_loader_odd_even, test_loader_odd_even = get_dataloaders("train.csv", "test.csv", "odd_even_1")
train_loader_big_small, test_loader_big_small = get_dataloaders("train.csv", "test.csv", "big_small_1")



In [44]:
train_df = pd.read_csv("train.csv")
print("Columns in train.csv:", train_df.columns)


Columns in train.csv: Index(['num1', 'num2', 'num3', 'sum', 'rolling_sum_mean', 'lag1_sum',
       'odd_even_1', 'big_small_1', 'lag1_odd_even_1.0', 'lag1_big_small_1.0'],
      dtype='object')


TRAIN


In [45]:
 #Define MLP Model with Improvements
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.sigmoid(self.fc4(x))
        return x


In [None]:
def train_model(train_file, test_file, target_column, model_name, epochs=100, batch_size=256, lr=0.0003):
    train_dataset = LotteryDataset(train_file, target_column)
    test_dataset = LotteryDataset(test_file, target_column)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = MLP(input_size=train_dataset.X.shape[1])
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        correct, total = 0, 0
        
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch).squeeze()
            loss = criterion(output, y_batch.float())  # Convert target to Float
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            correct += ((output > 0.5) == y_batch).sum().item()
            total += y_batch.size(0)
        
        accuracy = (correct / total) * 100
        print(f"Epoch {epoch+1}: Loss = {epoch_loss:.4f}, Accuracy = {accuracy:.2f}%")
    
    with open(f"{model_name}.pkl", "wb") as f:
        pickle.dump(model, f)
    print(f"Model {model_name} trained and saved as .pkl.")



# Train Models
train_model("train.csv", "test.csv", "odd_even_1", "model_odd_even_trained")


Epoch 1: Loss = 659.1778, Accuracy = 50.02%
Epoch 2: Loss = 652.8330, Accuracy = 50.02%
Epoch 3: Loss = 651.0371, Accuracy = 50.02%
Epoch 4: Loss = 649.1899, Accuracy = 50.02%
Model model_odd_even_trained trained and saved as .pkl.


In [47]:
train_model("train.csv", "test.csv", "big_small_1", "model_big_small_trained")


Epoch 1: Loss = 526.5229, Accuracy = 95.10%
Epoch 2: Loss = 480.1131, Accuracy = 99.57%
Epoch 3: Loss = 476.6630, Accuracy = 99.75%
Epoch 4: Loss = 475.5859, Accuracy = 99.80%
Model model_big_small_trained trained and saved as .pkl.


In [48]:
# Evaluation Function
def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    batch_accuracies = []
    with torch.no_grad():
        for i, (X_batch, y_batch) in enumerate(test_loader):
            output = model(X_batch).squeeze()
            loss = criterion(output, y_batch.float())  # Convert target to Float
            total_loss += loss.item()
            correct += ((output > 0.5) == y_batch).sum().item()
            total += y_batch.size(0)
            
            # Compute accuracy for every 10 draws
            if (i + 1) % 10 == 0:
                batch_accuracy = (correct / total) * 100
                batch_accuracies.append(batch_accuracy)
                correct, total = 0, 0  # Reset for next batch
    
    overall_accuracy = sum(batch_accuracies) / len(batch_accuracies)
    print(f"Overall Test Accuracy (100 draws): {overall_accuracy:.2f}%")
    for i, acc in enumerate(batch_accuracies):
        print(f"Accuracy for rounds {i*10+1}-{(i+1)*10}: {acc:.2f}%")

# Load trained models and evaluate
def load_and_evaluate(model_path, test_file, target_column):
    test_dataset = LotteryDataset(test_file, target_column)
    test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)
    
    with open(model_path, "rb") as f:
        model = pickle.load(f)
    
    criterion = nn.BCELoss()
    evaluate_model(model, test_loader, criterion)

# Evaluate models
load_and_evaluate("model_odd_even_trained.pkl", "test.csv", "odd_even_1")
load_and_evaluate("model_big_small_trained.pkl", "test.csv", "big_small_1")

Overall Test Accuracy (100 draws): 50.04%
Accuracy for rounds 1-10: 56.00%
Accuracy for rounds 11-20: 48.00%
Accuracy for rounds 21-30: 51.00%
Accuracy for rounds 31-40: 47.00%
Accuracy for rounds 41-50: 55.00%
Accuracy for rounds 51-60: 46.00%
Accuracy for rounds 61-70: 51.00%
Accuracy for rounds 71-80: 50.00%
Accuracy for rounds 81-90: 56.00%
Accuracy for rounds 91-100: 45.00%
Accuracy for rounds 101-110: 55.00%
Accuracy for rounds 111-120: 53.00%
Accuracy for rounds 121-130: 42.00%
Accuracy for rounds 131-140: 41.00%
Accuracy for rounds 141-150: 53.00%
Accuracy for rounds 151-160: 55.00%
Accuracy for rounds 161-170: 63.00%
Accuracy for rounds 171-180: 49.00%
Accuracy for rounds 181-190: 44.00%
Accuracy for rounds 191-200: 58.00%
Accuracy for rounds 201-210: 53.00%
Accuracy for rounds 211-220: 51.00%
Accuracy for rounds 221-230: 43.00%
Accuracy for rounds 231-240: 48.00%
Accuracy for rounds 241-250: 46.00%
Accuracy for rounds 251-260: 53.00%
Accuracy for rounds 261-270: 51.00%
Accura