## Can we predict a Mega Millions winning ticket based only on the frequency of past winning numbers?

## Step 1: Load the needed libraries.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
from collections import Counter

## Step 2: Load the data

In [2]:
dataset = pd.read_csv('MM_2002-3.csv')
balls = dataset.iloc[:, 1:6].values
megaball = dataset.iloc[:, -1].values

## Step 3: Process the data

In [3]:
# Pre-process data for algorithm
X_balls = np.zeros((balls.shape[0], int(np.nanmax(balls))))

# Adjust array size based on max value of Mega Ball
X_megaball = np.zeros((megaball.shape[0], int(np.nanmax(megaball))))

# Continue with the rest of the code
for i in range(megaball.shape[0]):
    if not np.isnan(megaball[i]):  # Check if the value is not NaN
        X_megaball[i, int(megaball[i]) - 1] = 1
    else:
        # Handle the NaN case (e.g., skip or assign a default value)
        print(f"NaN found at index {i}, skipping or assigning default value.")
        # You can skip or do something like:
        # X_megaball[i, default_value] = 1

random.seed(0)

## Step 4: Predict five winning numbers

In [4]:
# Predict 5 winning numbers
N = balls.shape[0]
d = 69
num_selected = []
numbers_of_rewards_1 = [0] * d
numbers_of_rewards_0 = [0] * d
for n in range(N):
    max_random = 0
    num = 0
    for i in range(d):
        random_beta = random.betavariate(numbers_of_rewards_1[i]+1,
                                         numbers_of_rewards_0[i]+1)
        if random_beta > max_random:
            max_random = random_beta
            num = i
            
    num_selected.append(num)
    if X_balls[n, num] == 1:
        numbers_of_rewards_1[num] += 1
    else:
        numbers_of_rewards_0[num] += 1
        
pick_five_balls = [i+1 for i, _ in Counter(num_selected).most_common(5)]

## Step 5: Predict the Mega Ball

In [5]:
# Predict winning Mega Ball number
N = megaball.shape[0]
d = 26
num_selected = []
numbers_of_rewards_1 = [0] * d
numbers_of_rewards_0 = [0] * d
for n in range(N):
    max_random = 0
    num = 0
    for i in range(d):
        random_beta = random.betavariate(numbers_of_rewards_1[i]+1,
                                         numbers_of_rewards_0[i]+1)
        if random_beta > max_random:
            max_random = random_beta
            num = i
            
    num_selected.append(num)
    # Assuming n is the index of rows
if num < X_megaball.shape[1]:  # Check if num is within bounds
    if X_megaball[n, num] == 1:
        numbers_of_rewards_1[num] += 1
    else:
        numbers_of_rewards_0[num] += 1
else:
    print(f"Index {num} is out of bounds for X_megaball with size {X_megaball.shape[1]}")
        
pick_megaball = [i+1 for i, _ in Counter(num_selected).most_common(1)]

Index 16 is out of bounds for X_megaball with size 5


## Step 6: Winning Mega Millions Numbers?

In [6]:
print('Mega Millions ticket to play: ', sorted(pick_five_balls), pick_megaball)

Mega Millions ticket to play:  [40, 41, 46, 50, 52] [23]
