In [9]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder



FEATURE ENGINEERING
 

In [2]:
# Load dataset
def load_data(file_path):
    df = pd.read_csv('newlucky28.csv')  
    df['code'] = df['code'].apply(eval)  # Convert string to list
    df['num1'] = df['code'].apply(lambda x: int(x[0]))
    df['num2'] = df['code'].apply(lambda x: int(x[1]))
    df['num3'] = df['code'].apply(lambda x: int(x[2]))
    df['sum'] = df['num1'] + df['num2'] + df['num3']
    df['odd_even'] = df['sum'] % 2  # 0 = Even, 1 = Odd
    df['big_small'] = (df['sum'] >= 14).astype(int)  # 0 = Small, 1 = Big
    
    # Feature Engineering (Rolling Mean and Lag Features)
    df['rolling_sum_mean'] = df['sum'].rolling(window=3, min_periods=1).mean()
    df['lag1_sum'] = df['sum'].shift(1)
    df['lag1_odd_even'] = df['odd_even'].shift(1)
    df['lag1_big_small'] = df['big_small'].shift(1)

    return df[['num1', 'num2', 'num3', 'sum', 'rolling_sum_mean', 'lag1_sum', 'odd_even', 'big_small', 'lag1_odd_even', 'lag1_big_small']]


In [3]:
data_file = "newlucky28.csv"  # Change this to your actual CSV file
df = load_data(data_file)

In [4]:
# Split dataset (80% Train, 20% Test ensuring 100 draws in test set)
test_size = max(0.2, 100 / len(df))  # Ensure test set has at least 100 draws
train_df, test_df = train_test_split(df, test_size=test_size, random_state=42, shuffle=False)

In [5]:
# Scale numerical features
scaler = StandardScaler()
numerical_features = ['sum', 'rolling_sum_mean', 'lag1_sum']
train_df[numerical_features] = scaler.fit_transform(train_df[numerical_features])
test_df[numerical_features] = scaler.transform(test_df[numerical_features])


In [10]:
encoder = LabelEncoder()
categorical_features = ['odd_even', 'big_small', 'lag1_odd_even', 'lag1_big_small']
for col in categorical_features:
    train_df[col] = encoder.fit_transform(train_df[col])
    test_df[col] = encoder.transform(test_df[col])


In [11]:

# Store processed datasets
train_df.to_csv("train.csv", index=False)
test_df.to_csv("test.csv", index=False)
