In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
from google.colab import files
uploaded = files.upload()

Saving megamillions.csv to megamillions (4).csv


In [3]:
import io
df = pd.read_csv(io.BytesIO(uploaded['megamillions.csv']))
# Dataset is now stored in a Pandas Dataframe

In [4]:
print(df.shape)
print(list(df.columns))

(1896, 7)
['Date', 'Num1', 'Num2', 'Num3', 'Num4', 'Num5', 'Mega Ball']


In [5]:
df.head()

Unnamed: 0,Date,Num1,Num2,Num3,Num4,Num5,Mega Ball
0,05/12/2003,12,44,15,18,1,42
1,09/12/2003,14,15,48,4,24,41
2,12/12/2003,16,32,46,9,45,26
3,16/12/2003,47,16,31,24,46,47
4,19/12/2003,5,10,39,17,35,38


In [6]:
# Amount of games we need to take into consideration for prediction
window_length = 7
window_length

7

In [7]:
df1 = df.copy()
df.drop(['Date'], axis=1, inplace=True)

number_of_features = df.shape[1] 

In [8]:
train = df.copy()
train.head((window_length+1))

Unnamed: 0,Num1,Num2,Num3,Num4,Num5,Mega Ball
0,12,44,15,18,1,42
1,14,15,48,4,24,41
2,16,32,46,9,45,26
3,47,16,31,24,46,47
4,5,10,39,17,35,38
5,29,20,10,17,1,36
6,46,21,12,32,18,49
7,8,7,36,48,44,9


In [9]:
train_rows = train.values.shape[0]
train_samples = np.empty([ train_rows - window_length, window_length, number_of_features], dtype=float)
train_labels = np.empty([ train_rows - window_length, number_of_features], dtype=float)
for i in range(0, train_rows-window_length):
    train_samples[i] = train.iloc[i : i+window_length, 0 : number_of_features]
    train_labels[i] = train.iloc[i+window_length : i+window_length+1, 0 : number_of_features]

In [10]:
train_samples[0]

array([[12., 44., 15., 18.,  1., 42.],
       [14., 15., 48.,  4., 24., 41.],
       [16., 32., 46.,  9., 45., 26.],
       [47., 16., 31., 24., 46., 47.],
       [ 5., 10., 39., 17., 35., 38.],
       [29., 20., 10., 17.,  1., 36.],
       [46., 21., 12., 32., 18., 49.]])

In [11]:
train_labels[0]

array([ 8.,  7., 36., 48., 44.,  9.])

In [12]:
scaler = StandardScaler()
transformed_dataset = scaler.fit_transform(train.values)
scaled_train_samples = pd.DataFrame(data=transformed_dataset, index=train.index)

In [13]:
scaled_train_samples.head(window_length+1)

Unnamed: 0,0,1,2,3,4,5
0,-1.049939,0.697848,-0.878971,-0.701529,-1.669639,1.888246
1,-0.940251,-0.887054,0.922918,-1.473816,-0.402084,1.808604
2,-0.830562,0.042027,0.813713,-1.197999,0.755248,0.613985
3,0.869613,-0.832402,-0.005328,-0.370549,0.810359,2.286452
4,-1.43385,-1.160313,0.431494,-0.756692,0.204138,1.569681
5,-0.117586,-0.613795,-1.151985,-0.756692,-1.669639,1.410398
6,0.814768,-0.559143,-1.042779,0.070758,-0.732751,2.445734
7,-1.269317,-1.324269,0.267686,0.953372,0.700137,-0.739916


In [14]:
x_train = np.empty([ train_rows - window_length, window_length, number_of_features], dtype=float)
y_train = np.empty([ train_rows - window_length, number_of_features], dtype=float)

for i in range(0, train_rows-window_length):
    x_train[i] = scaled_train_samples.iloc[i : i+window_length, 0 : number_of_features]
    y_train[i] = scaled_train_samples.iloc[i+window_length : i+window_length+1, 0 : number_of_features]

In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import mse

In [16]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [17]:
# Initialising the RNN
model = Sequential()
# Adding the input layer and the LSTM layer
model.add(Bidirectional(LSTM(240,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a first Dropout layer
model.add(Dropout(0.2))
# Adding a second LSTM layer
model.add(Bidirectional(LSTM(240,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a second Dropout layer
model.add(Dropout(0.2))
# Adding a third LSTM layer
model.add(Bidirectional(LSTM(240,
                        input_shape = (window_length, number_of_features),
                        return_sequences = True)))
# Adding a fourth LSTM layer
model.add(Bidirectional(LSTM(240,
                        input_shape = (window_length, number_of_features),
                        return_sequences = False)))
# Adding a third Dropout layer
model.add(Dropout(0.2))
# Adding the first output layer
model.add(Dense(70))
# Adding the last output layer
model.add(Dense(number_of_features))

In [18]:
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.0001), loss ='mse', metrics=['accuracy'])

In [19]:
model.fit(x=x_train, y=y_train, batch_size=100, epochs=2400, verbose=2)

Epoch 1/2400
19/19 - 18s - loss: 0.9749 - accuracy: 0.2726 - 18s/epoch - 945ms/step
Epoch 2/2400
19/19 - 0s - loss: 0.9475 - accuracy: 0.2922 - 267ms/epoch - 14ms/step
Epoch 3/2400
19/19 - 0s - loss: 0.9434 - accuracy: 0.3060 - 273ms/epoch - 14ms/step
Epoch 4/2400
19/19 - 0s - loss: 0.9424 - accuracy: 0.2954 - 274ms/epoch - 14ms/step
Epoch 5/2400
19/19 - 0s - loss: 0.9414 - accuracy: 0.3002 - 267ms/epoch - 14ms/step
Epoch 6/2400
19/19 - 0s - loss: 0.9400 - accuracy: 0.3097 - 270ms/epoch - 14ms/step
Epoch 7/2400
19/19 - 0s - loss: 0.9404 - accuracy: 0.3070 - 270ms/epoch - 14ms/step
Epoch 8/2400
19/19 - 0s - loss: 0.9384 - accuracy: 0.3102 - 262ms/epoch - 14ms/step
Epoch 9/2400
19/19 - 0s - loss: 0.9388 - accuracy: 0.2996 - 270ms/epoch - 14ms/step
Epoch 10/2400
19/19 - 0s - loss: 0.9376 - accuracy: 0.3033 - 271ms/epoch - 14ms/step
Epoch 11/2400
19/19 - 0s - loss: 0.9370 - accuracy: 0.2996 - 275ms/epoch - 14ms/step
Epoch 12/2400
19/19 - 0s - loss: 0.9363 - accuracy: 0.3017 - 260ms/epoch -

<keras.callbacks.History at 0x7fb0fab06f40>

In [20]:
val = df1.copy()
val = val.tail((window_length+3))
val = val.head((window_length+1))
val

Unnamed: 0,Date,Num1,Num2,Num3,Num4,Num5,Mega Ball
1886,07/03/2023,15,69,28,25,22,21
1887,10/03/2023,60,9,20,59,63,5
1888,14/03/2023,55,1,38,23,7,2
1889,17/03/2023,39,26,49,29,28,25
1890,21/03/2023,21,40,25,1,27,11
1891,24/03/2023,17,14,42,33,66,15
1892,28/03/2023,18,3,68,2,32,24
1893,31/03/2023,26,16,27,42,61,23


In [21]:
val_Date = df1.iloc[ (val.tail().index[-1]) ]['Date']
val1 = val.head((window_length))
val1

Unnamed: 0,Date,Num1,Num2,Num3,Num4,Num5,Mega Ball
1886,07/03/2023,15,69,28,25,22,21
1887,10/03/2023,60,9,20,59,63,5
1888,14/03/2023,55,1,38,23,7,2
1889,17/03/2023,39,26,49,29,28,25
1890,21/03/2023,21,40,25,1,27,11
1891,24/03/2023,17,14,42,33,66,15
1892,28/03/2023,18,3,68,2,32,24


In [22]:
val1.drop(['Date'], axis=1, inplace=True)
val1 = np.array(val1)
val1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val1.drop(['Date'], axis=1, inplace=True)


array([[15, 69, 28, 25, 22, 21],
       [60,  9, 20, 59, 63,  5],
       [55,  1, 38, 23,  7,  2],
       [39, 26, 49, 29, 28, 25],
       [21, 40, 25,  1, 27, 11],
       [17, 14, 42, 33, 66, 15],
       [18,  3, 68,  2, 32, 24]])

In [23]:
x_val = scaler.transform(val1)
x_val

array([[-0.88540638,  2.06414344, -0.16913592, -0.31538536, -0.51230623,
         0.2157791 ],
       [ 1.58258906, -1.21496503, -0.60595759,  1.56016832,  1.74724765,
        -1.05848111],
       [ 1.30836734, -1.6521795 ,  0.37689116, -0.42571205, -1.33897228,
        -1.2974049 ],
       [ 0.43085785, -0.2858843 ,  0.97752096, -0.09473199, -0.18163981,
         0.53434415],
       [-0.55634032,  0.47924101, -0.33294405, -1.6393056 , -0.23675088,
        -0.58063353],
       [-0.77571769, -0.94170599,  0.595302  ,  0.12592139,  1.91258086,
        -0.26206848],
       [-0.72087335, -1.54287588,  2.01497242, -1.58414226,  0.03880447,
         0.45470289]])

In [24]:
y_val_pred = model.predict(np.array([x_val]))
print("The predicted numbers for the lottery game which took place on",val_Date, "were (without rounding up):", scaler.inverse_transform(y_val_pred).astype(int)[0])

The predicted numbers for the lottery game which took place on 31/03/2023 were (without rounding up): [25 15 27 41 60 22]


In [25]:
y_val_pred = model.predict(np.array([x_val]))
print("The predicted numbers for the lottery game which took place on",val_Date, "were (with rounding up):", scaler.inverse_transform(y_val_pred).astype(int)[0]+1)

The predicted numbers for the lottery game which took place on 31/03/2023 were (with rounding up): [26 16 28 42 61 23]


In [26]:
print("The actual numbers for the lottery game which took place on",val_Date, "were:")
y_val_true = val.tail(1)
y_val_true

The actual numbers for the lottery game which took place on 31/03/2023 were:


Unnamed: 0,Date,Num1,Num2,Num3,Num4,Num5,Mega Ball
1893,31/03/2023,26,16,27,42,61,23


In [27]:
print('-' * 40)
print('Prediction vs. GoundTruth without rounding up or down')
for i in range(1,10):
  test = df1.copy()
  test = test.tail((window_length+10-i))
  test = test.head((window_length+1))
  test_Date = df1.iloc[ (test.tail().index[-1]) ]['Date']
  test1 = test.head((window_length))
  test1.drop(['Date'], axis=1, inplace=True)
  test1 = np.array(test1)
  x_test = scaler.transform(test1)
  y_test_pred = model.predict(np.array([x_test]))
  y_test_true = test.drop(['Date'], axis=1, inplace=True)
  y_test_true = test.tail(1)
  print('Drawing  Date', test_Date)
  print('Prediction:\t', scaler.inverse_transform(y_test_pred).astype(int)[0])
  print('GoundTruth:\t', np.array(y_test_true)[0])
  print('-' * 40)


----------------------------------------
Prediction vs. GoundTruth without rounding up or down
Drawing  Date 10/03/2023
Prediction:	 [61  9 20 58 62  5]
GoundTruth:	 [60  9 20 59 63  5]
----------------------------------------
Drawing  Date 14/03/2023
Prediction:	 [55  0 37 23  6  2]
GoundTruth:	 [55  1 38 23  7  2]
----------------------------------------
Drawing  Date 17/03/2023


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


Prediction:	 [38 26 48 29 27 25]
GoundTruth:	 [39 26 49 29 28 25]
----------------------------------------
Drawing  Date 21/03/2023
Prediction:	 [20 39 24  1 26 10]
GoundTruth:	 [21 40 25  1 27 11]
----------------------------------------
Drawing  Date 24/03/2023
Prediction:	 [17 13 41 32 65 14]
GoundTruth:	 [17 14 42 33 66 15]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


Drawing  Date 28/03/2023
Prediction:	 [17  2 68  2 30 23]
GoundTruth:	 [18  3 68  2 32 24]
----------------------------------------
Drawing  Date 31/03/2023
Prediction:	 [25 15 27 41 60 22]
GoundTruth:	 [26 16 27 42 61 23]
----------------------------------------
Drawing  Date 04/04/2023
Prediction:	 [ 1 44 61 36 64  4]
GoundTruth:	 [ 1 45 62 37 64  4]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


Drawing  Date 07/04/2023
Prediction:	 [51 66 49 11 31 21]
GoundTruth:	 [51 66 49 12 32 21]
----------------------------------------


In [28]:
print('-' * 40)
print('Prediction vs. GoundTruth with rounding up')
for i in range(1,10):
  test = df1.copy()
  test = test.tail((window_length+10-i))
  test = test.head((window_length+1))
  test_Date = df1.iloc[ (test.tail().index[-1]) ]['Date']
  test1 = test.head((window_length))
  test1.drop(['Date'], axis=1, inplace=True)
  test1 = np.array(test1)
  x_test = scaler.transform(test1)
  y_test_pred = model.predict(np.array([x_test]))
  y_test_true = test.drop(['Date'], axis=1, inplace=True)
  y_test_true = test.tail(1)
  print('Drawing  Date', test_Date)
  print('Prediction:\t', scaler.inverse_transform(y_test_pred).astype(int)[0]+1)
  print('GoundTruth:\t', np.array(y_test_true)[0])
  print('-' * 40)

----------------------------------------
Prediction vs. GoundTruth with rounding up
Drawing  Date 10/03/2023
Prediction:	 [62 10 21 59 63  6]
GoundTruth:	 [60  9 20 59 63  5]
----------------------------------------
Drawing  Date 14/03/2023
Prediction:	 [56  1 38 24  7  3]
GoundTruth:	 [55  1 38 23  7  2]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


Drawing  Date 17/03/2023
Prediction:	 [39 27 49 30 28 26]
GoundTruth:	 [39 26 49 29 28 25]
----------------------------------------
Drawing  Date 21/03/2023
Prediction:	 [21 40 25  2 27 11]
GoundTruth:	 [21 40 25  1 27 11]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


Drawing  Date 24/03/2023
Prediction:	 [18 14 42 33 66 15]
GoundTruth:	 [17 14 42 33 66 15]
----------------------------------------
Drawing  Date 28/03/2023
Prediction:	 [18  3 69  3 31 24]
GoundTruth:	 [18  3 68  2 32 24]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


Drawing  Date 31/03/2023
Prediction:	 [26 16 28 42 61 23]
GoundTruth:	 [26 16 27 42 61 23]
----------------------------------------
Drawing  Date 04/04/2023
Prediction:	 [ 2 45 62 37 65  5]
GoundTruth:	 [ 1 45 62 37 64  4]
----------------------------------------
Drawing  Date 07/04/2023
Prediction:	 [52 67 50 12 32 22]
GoundTruth:	 [51 66 49 12 32 21]
----------------------------------------


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test1.drop(['Date'], axis=1, inplace=True)


In [30]:
next_Date = '11/04/2023'

next = df.copy()
next = next.tail((window_length))
next

Unnamed: 0,Num1,Num2,Num3,Num4,Num5,Mega Ball
1889,39,26,49,29,28,25
1890,21,40,25,1,27,11
1891,17,14,42,33,66,15
1892,18,3,68,2,32,24
1893,26,16,27,42,61,23
1894,1,45,62,37,64,4
1895,51,66,49,12,32,21


In [None]:
next = np.array(next)
next

In [31]:
x_next = scaler.transform(next)
x_next



array([[ 0.43085785, -0.2858843 ,  0.97752096, -0.09473199, -0.18163981,
         0.53434415],
       [-0.55634032,  0.47924101, -0.33294405, -1.6393056 , -0.23675088,
        -0.58063353],
       [-0.77571769, -0.94170599,  0.595302  ,  0.12592139,  1.91258086,
        -0.26206848],
       [-0.72087335, -1.54287588,  2.01497242, -1.58414226,  0.03880447,
         0.45470289],
       [-0.28211861, -0.83240238, -0.22373863,  0.62239148,  1.63702551,
         0.37506162],
       [-1.65322718,  0.75250005,  1.68735617,  0.34657476,  1.80235872,
        -1.13812238],
       [ 1.08898997,  1.90018802,  0.97752096, -1.03250883,  0.03880447,
         0.2157791 ]])

In [32]:
y_next_pred = model.predict(np.array([x_next]))
print("The predicted numbers for the lottery game which will take place on",next_Date, "are (without rounding up):", scaler.inverse_transform(y_next_pred).astype(int)[0])

The predicted numbers for the lottery game which will take place on 11/04/2023 are (without rounding up): [30 40 17 46 32 12]


In [33]:
y_next_pred = model.predict(np.array([x_next]))
print("The predicted numbers for the lottery game which will take place on",next_Date, "are (with rounding up):", scaler.inverse_transform(y_next_pred).astype(int)[0]+1)

The predicted numbers for the lottery game which will take place on 11/04/2023 are (with rounding up): [31 41 18 47 33 13]


In [34]:
y_next_pred = model.predict(np.array([x_next]))
print("The predicted numbers for the lottery game which will take place on",next_Date, "are (with rounding down):", scaler.inverse_transform(y_next_pred).astype(int)[0]-1)

The predicted numbers for the lottery game which will take place on 11/04/2023 are (with rounding down): [29 39 16 45 31 11]
