<a href="https://colab.research.google.com/github/Snowdenstyll/Lottery/blob/main/server/jupyterNotebooks/Keno_Lottery_Claude.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Data Preprocessing
def prepare_data(csv_file):
    # Read the data
    df = pd.read_csv(csv_file)

    # Separate A and P games
    df_A = df[df['AP'] == 'A'].drop(['PlayDate', 'AP'], axis=1)
    df_P = df[df['AP'] == 'P'].drop(['PlayDate', 'AP'], axis=1)

    # Process each game type separately
    def process_game_data(game_df):
        # Convert to float
        game_df = game_df.astype(float)

        # Normalize numbers to [0,1] range
        scaler = MinMaxScaler(feature_range=(0, 1))
        transformed_data = scaler.fit_transform(game_df.values)
        transformed_df = pd.DataFrame(data=transformed_data, columns=game_df.columns)

        # Create sequences
        number_of_rows = len(transformed_df)
        window_length = 5
        number_of_features = transformed_df.shape[1]

        X = np.empty([number_of_rows - window_length, window_length, number_of_features], dtype=float)
        y = np.empty([number_of_rows - window_length, number_of_features], dtype=float)

        for i in range(0, number_of_rows - window_length):
            X[i] = transformed_df.iloc[i:i+window_length, :]
            y[i] = transformed_df.iloc[i+window_length:i+window_length+1, :].values

        return X, y, scaler

    return process_game_data(df_A), process_game_data(df_P)

# Create the model
def create_model(window_length, number_of_features):
    model = Sequential([
        Input(shape=(window_length, number_of_features)),  # Explicit input layer
        Bidirectional(LSTM(128, return_sequences=True)),
        Dropout(0.3),
        Bidirectional(LSTM(64)),
        Dropout(0.3),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(number_of_features, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                 loss='mse',
                 metrics=['mae'])
    return model

# Train separate models for A and P games
def train_model(X, y, game_type):
    try:
        # Split the data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Create and train the model
        model = create_model(X.shape[1], X.shape[2])

        callbacks = [
            EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
            ModelCheckpoint(f'best_model_{game_type}.keras',  # Changed extension to .keras
                          save_best_only=True,
                          monitor='val_loss')
        ]

        history = model.fit(X_train, y_train,
                          epochs=200,
                          batch_size=32,
                          validation_split=0.2,
                          callbacks=callbacks,
                          verbose=1)

        return model, history
    except Exception as e:
        print(f"Error training model for game type {game_type}: {str(e)}")
        raise

# Function to convert predictions back to actual numbers
''' def process_predictions(predictions, scaler, original_features_count=20):
    try:
        # Inverse transform the normalized predictions
        predictions_original = scaler.inverse_transform(predictions)

        # Round to nearest integers and ensure unique numbers
        rounded_predictions = []
        for pred in predictions_original:
            # Sort numbers and round them
            sorted_numbers = np.sort(np.round(pred))
            # Ensure no duplicates and numbers are within range
            unique_numbers = np.unique(np.clip(sorted_numbers, 1, 69))
            # If we don't have exactly 20 numbers, adjust
            while len(unique_numbers) < original_features_count:
                # Add missing numbers
                available_numbers = set(range(1, 70)) - set(unique_numbers)
                unique_numbers = np.append(unique_numbers, np.random.choice(list(available_numbers)))
                unique_numbers = np.sort(unique_numbers)
            rounded_predictions.append(unique_numbers[:20])

        return rounded_predictions
    except Exception as e:
        print(f"Error processing predictions: {str(e)}")
        raise '''

def process_predictions(predictions, scaler, original_features_count=20, top_n=10):
    try:
        # Inverse transform the predictions
        predictions_original = scaler.inverse_transform(predictions)
        processed_predictions = []

        for pred in predictions_original:
            # Create pairs of (number, confidence)
            number_confidence_pairs = list(enumerate(pred))

            # Sort by confidence (second element of pair) in descending order
            sorted_pairs = sorted(number_confidence_pairs, key=lambda x: x[1], reverse=True)

            # DEBUG: Print sorted pairs with their confidence scores
            print(f"\nSorted pairs (number, confidence): {sorted_pairs[:15]}")  # Just print top 15 for debugging

            # Get the top 10 indices (by highest confidence)
            top_indices = [pair[0] for pair in sorted_pairs[:top_n]]

            # DEBUG: Print the top indices
            print(f"Top {top_n} indices: {top_indices}")

            # Get the corresponding top numbers
            top_numbers = np.sort(np.round(pred[top_indices])).astype(int)

            # Ensure unique numbers and restrict to range [1, 69]
            unique_numbers = np.unique(np.clip(top_numbers, 1, 69))

            # If there are less than 10 unique numbers, fill in missing ones
            while len(unique_numbers) < top_n:
                available_numbers = set(range(1, 70)) - set(unique_numbers)
                unique_numbers = np.append(unique_numbers, np.random.choice(list(available_numbers)))
                unique_numbers = np.sort(unique_numbers)

            # DEBUG: Print the final top numbers after uniqueness and range enforcement
            print(f"Final top {top_n} numbers: {unique_numbers[:top_n]}")

            processed_predictions.append({
                'numbers': unique_numbers[:top_n].astype(int),
                'confidence': np.mean([pair[1] for pair in sorted_pairs[:top_n]])  # Average confidence of top 10
            })

        # Sort all predictions by confidence score
        sorted_predictions = sorted(processed_predictions,
                                    key=lambda x: x['confidence'],
                                    reverse=True)

        # Format the output to return top N predictions
        formatted_output = []
        for i, pred in enumerate(sorted_predictions[:top_n], 1):
            formatted_output.append({
                'rank': i,
                'numbers': pred['numbers'].tolist(),
                'confidence_score': round(float(pred['confidence']), 2)
            })

        return formatted_output

    except Exception as e:
        print(f"Error processing predictions: {str(e)}")
        return None


def display_predictions(predictions, game_type):
    try:
        if predictions:
            for pred in predictions:
                numbers_str = ', '.join(map(str, pred['numbers']))
                print(f"\nGame Type: {game_type}")
                print(f"Rank: {pred['rank']}")
                print(f"Numbers: {numbers_str}")
                print(f"Confidence Score: {pred['confidence_score']:.2f}")
                print("-" * 50)
    except Exception as e:
        print(f"Error in display_predictions: {str(e)}")

def analyze_predictions(model, X, scaler, game_type, actual_data):
    # Get predictions
    predictions = model.predict(X)
    predictions = process_predictions(predictions, scaler)

    # Convert predictions to integers
    predictions = np.array(predictions).astype(int)

    # 1. Show next game prediction
    print(f"\nPredicted numbers for next {game_type} game:")
    print(sorted(predictions[-1]))

    # 1.1 Get top 10 and top 5 predicted numbers
    all_numbers = predictions.flatten()
    number_freq = Counter(all_numbers)

    top_10_numbers = [number for number, freq in number_freq.most_common(10)]
    top_5_numbers = top_10_numbers[:5]  # Top 5 are the first 5 of top 10

    print(f"\nTop 10 most frequently predicted numbers ({game_type} Game):", top_10_numbers)
    print(f"\nTop 5 most frequently predicted numbers ({game_type} Game):", top_5_numbers)

    # 2. Visualization of prediction accuracy
    plt.figure(figsize=(15, 5))

    # 2.1 Prediction vs Actual heatmap
    plt.subplot(1, 3, 1)
    actual = scaler.inverse_transform(actual_data[-1:])
    actual = np.round(actual).astype(int)[0]

    comparison_data = pd.DataFrame({
        'Predicted': sorted(predictions[-1]),
        'Actual': sorted(actual)
    })

    sns.heatmap(comparison_data.corr(), annot=True, cmap='coolwarm')
    plt.title(f'Prediction vs Actual Correlation ({game_type} Game)')

    # 2.2 Number frequency distribution
    plt.subplot(1, 3, 2)
    all_numbers = predictions.flatten()
    number_freq = Counter(all_numbers)

    plt.bar(number_freq.keys(), number_freq.values())
    plt.title(f'Number Frequency in Predictions ({game_type} Game)')
    plt.xlabel('Number')
    plt.ylabel('Frequency')

    # 2.3 Error distribution
    plt.subplot(1, 3, 3)
    errors = np.abs(predictions - actual).flatten()
    plt.hist(errors, bins=20)
    plt.title(f'Prediction Error Distribution ({game_type} Game)')
    plt.xlabel('Absolute Error')
    plt.ylabel('Frequency')

    plt.tight_layout()
    plt.show()

    # 3. Most frequently predicted numbers
    top_numbers = pd.DataFrame.from_dict(number_freq, orient='index', columns=['frequency'])
    top_numbers = top_numbers.sort_values('frequency', ascending=False).head(10)
    print(f"\nTop 10 most frequently predicted numbers ({game_type} Game):")
    print(top_numbers)

    return predictions

  # Call this function for both models
  #analyze_predictions(model_A, X_A, scaler_A, 'A', y_A)
  #analyze_predictions(model_P, X_P, scaler_P, 'P', y_P)


In [None]:
# Main execution
if __name__ == "__main__":
    try:
        # Process both game types
        print("Processing data...")
        (X_A, y_A, scaler_A), (X_P, y_P, scaler_P) = prepare_data("2024.csv")

        print("Training model A...")
        # Train models for both game types
        model_A, history_A = train_model(X_A, y_A, 'A')

        print("Training model P...")
        model_P, history_P = train_model(X_P, y_P, 'P')

        # Make predictions for next games
        print("Making predictions...")
        last_sequence_A = X_A[-1:]
        last_sequence_P = X_P[-1:]

        print("Predicting next 'A' game numbers...")
        print("Predicting next 'P' game numbers...")

        pred_A = model_A.predict(last_sequence_A)
        pred_P = model_P.predict(last_sequence_P)

        # Convert predictions to actual numbers
        numbers_A = process_predictions(pred_A, scaler_A)
        numbers_P = process_predictions(pred_P, scaler_P)

        print("\nPredicted numbers for next 'A' game:", numbers_A[0])
        print("Predicted numbers for next 'P' game:", numbers_P[0])

        # Evaluate models
        print("\nModel A Evaluation:")
        loss_A = model_A.evaluate(X_A, y_A)
        print(f"MSE: {loss_A[0]:.4f}, MAE: {loss_A[1]:.4f}")

        print("\nModel P Evaluation:")
        loss_P = model_P.evaluate(X_P, y_P)
        print(f"MSE: {loss_P[0]:.4f}, MAE: {loss_P[1]:.4f}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

Processing data...
Training model A...
Epoch 1/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 108ms/step - loss: 0.0614 - mae: 0.2083 - val_loss: 0.0507 - val_mae: 0.1856
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 0.0390 - mae: 0.1614 - val_loss: 0.0462 - val_mae: 0.1587
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0344 - mae: 0.1433 - val_loss: 0.0447 - val_mae: 0.1640
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.0334 - mae: 0.1467 - val_loss: 0.0437 - val_mae: 0.1646
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0327 - mae: 0.1448 - val_loss: 0.0436 - val_mae: 0.1589
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.0328 - mae: 0.1441 - val_loss: 0.0434 - val_mae: 0.1594
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [None]:
predictions_A = model_A.predict(last_sequence_A)
top_10_predictions_A = process_predictions(predictions_A, scaler_A, original_features_count=10, top_n=10)

predictions_P = model_P.predict(last_sequence_P)
top_10_predictions_P = process_predictions(predictions_P, scaler_P, original_features_count=10, top_n=10)

display_predictions(top_10_predictions_A, 'A')
display_predictions(top_10_predictions_P, 'P')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

Sorted pairs (number, confidence): [(19, 67.30063), (18, 63.95086), (17, 60.706852), (16, 57.375664), (15, 54.109886), (14, 50.715282), (13, 47.297832), (12, 43.827694), (11, 40.447495), (10, 36.954765), (9, 33.909683), (8, 30.642344), (7, 27.690271), (6, 24.018663), (5, 20.456509)]
Top 10 indices: [19, 18, 17, 16, 15, 14, 13, 12, 11, 10]
Final top 10 numbers: [37 40 44 47 51 54 57 61 64 67]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

Sorted pairs (number, confidence): [(19, 66.860695), (18, 63.593044), (17, 60.47284), (16, 57.694374), (15, 54.209644), (14, 50.24315), (13, 47.719685), (12, 44.555893), (11, 40.414143), (10, 37.85281), (9, 34.899616), (8, 31.538483), (7, 27.630959), (6, 24.703793), (5, 20.661854)]
Top 10 indices: [19, 18, 17, 16, 15, 14, 13, 12, 11, 10]
Final top 10 numbers: [38 40 45 48 50 54 58 60 64 67]

Game Type: A
Rank: 1
Numbers: 37, 40, 44, 47, 51, 54, 57, 61, 64

In [None]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, Conv1D, MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

def create_features(df):
    """Create additional features for the model"""
    features = pd.DataFrame()

    # Basic statistics of previous numbers
    features['mean'] = df.mean(axis=1)
    features['std'] = df.std(axis=1)
    features['median'] = df.median(axis=1)

    # Number range features
    features['max_diff'] = df.max(axis=1) - df.min(axis=1)
    features['range_1_20'] = df.apply(lambda x: sum(1 for n in x if n <= 20), axis=1)
    features['range_21_40'] = df.apply(lambda x: sum(1 for n in x if 20 < n <= 40), axis=1)
    features['range_41_60'] = df.apply(lambda x: sum(1 for n in x if 40 < n <= 60), axis=1)
    features['range_61_plus'] = df.apply(lambda x: sum(1 for n in x if n > 60), axis=1)

    # Consecutive numbers
    features['consecutive_count'] = df.apply(lambda x: sum(1 for i in range(len(x)-1) if x.iloc[i+1] - x.iloc[i] == 1), axis=1)

    # Even/Odd ratio
    features['even_count'] = df.apply(lambda x: sum(1 for n in x if n % 2 == 0), axis=1)
    features['odd_count'] = df.apply(lambda x: sum(1 for n in x if n % 2 != 0), axis=1)

    return features

################

################

def prepare_improved_data(csv_file):
    df = pd.read_csv(csv_file)

    # Separate A and P games
    df_A = df[df['AP'] == 'A'].drop(['PlayDate', 'AP'], axis=1)
    df_P = df[df['AP'] == 'P'].drop(['PlayDate', 'AP'], axis=1)

    def process_game_data(game_df):
        # Convert to float
        game_df = game_df.astype(float)

        # Normalize numbers to [0,1] range
        scaler = MinMaxScaler(feature_range=(0, 1))
        transformed_data = scaler.fit_transform(game_df.values)
        transformed_df = pd.DataFrame(data=transformed_data, columns=game_df.columns)

        # Create sequences
        number_of_rows = len(transformed_df)
        window_length = 5
        number_of_features = transformed_df.shape[1]

        X = np.empty([number_of_rows - window_length, window_length, number_of_features], dtype=float)
        y = np.empty([number_of_rows - window_length, number_of_features], dtype=float)

        for i in range(0, number_of_rows - window_length):
            X[i] = transformed_df.iloc[i:i+window_length, :]
            y[i] = transformed_df.iloc[i+window_length:i+window_length+1, :].values

        return X, y, scaler

    return process_game_data(df_A), process_game_data(df_P)

def create_improved_model(window_length, number_of_features, output_features):
    model = Sequential([
        Input(shape=(window_length, number_of_features)),

        # CNN layers for feature extraction
        Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'),
        MaxPooling1D(pool_size=2),

        # LSTM layers
        Bidirectional(LSTM(128, return_sequences=True)),
        Dropout(0.4),

        Bidirectional(LSTM(64, return_sequences=True)),
        Dropout(0.4),

        Bidirectional(LSTM(32)),
        Dropout(0.4),

        # Dense layers
        Dense(256, activation='relu'),
        Dropout(0.4),
        Dense(128, activation='relu'),
        Dropout(0.4),

        # Output layer
        Dense(output_features, activation='sigmoid')
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    return model

def train_improved_model(X, y, game_type):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = create_improved_model(X.shape[1], X.shape[2], y.shape[1])

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True),
        ModelCheckpoint(f'improved_model_{game_type}.keras', save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001)
    ]

    history = model.fit(
        X_train, y_train,
        epochs=300,
        batch_size=32,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )

    return model, history

# Main execution for improved model
if __name__ == "__main__":
    print("Processing data with improved features...")
    (X_A, y_A, scaler_A), (X_P, y_P, scaler_P) = prepare_improved_data("2024.csv")

    print("Training improved model A...")
    model_A, history_A = train_improved_model(X_A, y_A, 'A')

    print("Training improved model P...")
    model_P, history_P = train_improved_model(X_P, y_P, 'P')

    # Make predictions and analyze results
    #print("Analyzing results...")

    #analyze_predictions(model_A, X_A, scaler_A, 'A', y_A)
    #analyze_predictions(model_P, X_P, scaler_P, 'P', y_P)

Processing data with improved features...
Training improved model A...
Epoch 1/300
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 148ms/step - loss: 0.0654 - mae: 0.2151 - val_loss: 0.0737 - val_mae: 0.2342 - learning_rate: 0.0010
Epoch 2/300
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0610 - mae: 0.2073 - val_loss: 0.0580 - val_mae: 0.2034 - learning_rate: 0.0010
Epoch 3/300
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0442 - mae: 0.1741 - val_loss: 0.0468 - val_mae: 0.1605 - learning_rate: 0.0010
Epoch 4/300
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0379 - mae: 0.1520 - val_loss: 0.0437 - val_mae: 0.1612 - learning_rate: 0.0010
Epoch 5/300
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0352 - mae: 0.1527 - val_loss: 0.0463 - val_mae: 0.1743 - learning_rate: 0.0010
Epoch 6/300
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Make predictions and analyze results
print("Analyzing results...")

# Make predictions for next games
print("Making predictions...")
last_sequence_A = X_A[-1:]
last_sequence_P = X_P[-1:]

print("Predicting next 'A' game numbers...")
print("Predicting next 'P' game numbers...")

predictions_A = model_A.predict(last_sequence_A)
top_10_predictions_A = process_predictions(predictions_A, scaler_A, original_features_count=10, top_n=10)

predictions_P = model_P.predict(last_sequence_P)
top_10_predictions_P = process_predictions(predictions_P, scaler_P, original_features_count=10, top_n=10)

display_predictions(top_10_predictions_A, 'A')
display_predictions(top_10_predictions_P, 'P')


Analyzing results...
Making predictions...
Predicting next 'A' game numbers...
Predicting next 'P' game numbers...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step

Sorted pairs (number, confidence): [(19, 67.30063), (18, 63.95086), (17, 60.706852), (16, 57.375664), (15, 54.109886), (14, 50.715282), (13, 47.297832), (12, 43.827694), (11, 40.447495), (10, 36.954765), (9, 33.909683), (8, 30.642344), (7, 27.690271), (6, 24.018663), (5, 20.456509)]
Top 10 indices: [19, 18, 17, 16, 15, 14, 13, 12, 11, 10]
Final top 10 numbers: [37 40 44 47 51 54 57 61 64 67]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437ms/step

Sorted pairs (number, confidence): [(19, 66.860695), (18, 63.593044), (17, 60.47284), (16, 57.694374), (15, 54.209644), (14, 50.24315), (13, 47.719685), (12, 44.555893), (11, 40.414143), (10, 37.85281), (9, 34.899616), (8, 31.538483), (7, 27.630959), (6, 24.703793), (5, 20.661854)]
Top 10 indices: [19, 18, 17, 16, 15, 14, 13, 12, 11, 10]
Final