In [1]:
#First, let’s import the relevant libraries and packages:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Bidirectional, Dropout

In [2]:
df = pd.read_csv('drs.csv')

In [3]:
df.head(10)

Unnamed: 0,A,B,C,D,E
0,'80','44','72','23','80'
1,'06','46','12','23','06'
2,'31','84','46','68','31'
3,'74','05','33','52','74'
4,'74','43','54','90','74'
5,'88','08','73','53','88'
6,'71','10','72','73','71'
7,'56','02','18','41','56'
8,'58','60','63','12','58'
9,'70','68','34','30','70'


In [4]:
df.tail(10)

Unnamed: 0,A,B,C,D,E
50,'75','60','77','54','75'
51,'16','28','69','15','16'
52,'01','78','51','40','01'
53,'50','61','17','79','50'
54,'19','75','68','12','19'
55,'18','14','05','57','18'
56,'55','25','42','1','55'
57,'57','40','63','60','57'
58,'43','49','47','66','43'
59,'71','14','48','04','71'


In [5]:
print(df.shape)

(60, 5)


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   A       60 non-null     object
 1   B       60 non-null     object
 2   C       60 non-null     object
 3   D       60 non-null     object
 4   E       60 non-null     object
dtypes: object(5)
memory usage: 2.5+ KB


In [7]:
df.describe()

Unnamed: 0,A,B,C,D,E
count,60,60,60,60,60
unique,44,41,42,47,44
top,'74','08','51','22','74'
freq,3,3,3,3,3


In [8]:
df.isnull().sum()

A    0
B    0
C    0
D    0
E    0
dtype: int64

In [9]:
# Turn categorical variables into numbers and fill missing
for label, content in df.items():
    if not pd.api.types.is_numeric_dtype(content):
        # Add binary column to indicate whether sample had missing value
        df[label+"_is_missing"] = pd.isnull(content)
        # Turn categories into numbers and add +1
        df[label] = pd.Categorical(content).codes+1

In [10]:
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
A,39,3,15,35,35,42,33,23,25,31,...,36,5,1,21,7,6,22,24,19,32
B,21,22,37,4,20,6,7,2,26,30,...,26,14,35,27,33,10,13,18,23,9
C,39,6,26,18,31,40,39,10,34,19,...,41,37,30,9,36,2,24,34,27,28
D,14,14,37,28,47,29,40,23,10,17,...,30,11,22,43,10,32,7,33,36,3
E,39,3,15,35,35,42,33,23,25,31,...,36,5,1,21,7,6,22,24,19,32
A_is_missing,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
B_is_missing,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
C_is_missing,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
D_is_missing,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
E_is_missing,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [11]:
#Deep learning algorithms expect all input features to vary in a similar way, and ideally to have a mean of 0, and a variance of 1. We must re-scale our data so that it fulfills these requirements.

scaler = StandardScaler().fit(df.values)
transformed_dataset = scaler.transform(df.values)
transformed_df = pd.DataFrame(data=transformed_dataset, index=df.index)

In [12]:
#Let’s check out our scaled data:
transformed_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.377545,0.030214,1.332166,-0.694416,1.377545,0.0,0.0,0.0,0.0,0.0
1,-1.56267,0.116538,-1.340264,-0.694416,-1.56267,0.0,0.0,0.0,0.0,0.0
2,-0.582598,1.411403,0.27939,1.090117,-0.582598,0.0,0.0,0.0,0.0,0.0
3,1.050855,-1.4373,-0.368471,0.391821,1.050855,0.0,0.0,0.0,0.0,0.0
4,1.050855,-0.056111,0.684304,1.866,1.050855,0.0,0.0,0.0,0.0,0.0


In [13]:
# All our games
number_of_rows = df.values.shape[0]
number_of_rows

60

In [14]:
# Amount of games we need to take into consideration for prediction
window_length = 7
window_length

7

In [15]:
# Balls counts
number_of_features = df.values.shape[1]
number_of_features

10

In [16]:
#Next, let’s crate X and y for each row in our scaled data. It should have format for Keras LSTM model (rows, window size, balls)

X = np.empty([ number_of_rows - window_length, window_length, number_of_features], dtype=float)
y = np.empty([ number_of_rows - window_length, number_of_features], dtype=float)
for i in range(0, number_of_rows-window_length):
    X[i] = transformed_df.iloc[i : i+window_length, 0 : number_of_features]
    y[i] = transformed_df.iloc[i+window_length : i+window_length+1, 0 : number_of_features]

In [17]:
#Let’s check out X shape:
X.shape    

(53, 7, 10)

In [18]:
#Let’s check out y shape:
y.shape

(53, 10)

In [19]:
#Lets’ check out our first scaled sample (which made of 7 consecutive lottery games):

X[0]

array([[ 1.37754537,  0.03021351,  1.33216578, -0.69441597,  1.37754537,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-1.56267005,  0.11653784, -1.34026405, -0.69441597, -1.56267005,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.58259824,  1.41140268,  0.27939039,  1.09011669, -0.58259824,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.05085477, -1.43729998, -0.36847139,  0.3918213 ,  1.05085477,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.05085477, -0.05611081,  0.684304  ,  1.86600045,  1.05085477,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.62256333, -1.26465133,  1.4131485 ,  0.46940968,  1.62256333,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.88750947, -1.17832701,  1.33216578,  1.32288182,  0.88750947,
         0.        ,  0.        ,  0.        

In [20]:
#Lets’ check out our first scaled label:

y[0]

array([ 0.07078296, -1.60994862, -1.01633316,  0.00387942,  0.07078296,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ])

In [21]:
#Lets’ check out the second scaled sample (which made of 7 consecutive lottery games):

X[1]

array([[-1.56267005,  0.11653784, -1.34026405, -0.69441597, -1.56267005,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.58259824,  1.41140268,  0.27939039,  1.09011669, -0.58259824,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.05085477, -1.43729998, -0.36847139,  0.3918213 ,  1.05085477,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.05085477, -0.05611081,  0.684304  ,  1.86600045,  1.05085477,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.62256333, -1.26465133,  1.4131485 ,  0.46940968,  1.62256333,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.88750947, -1.17832701,  1.33216578,  1.32288182,  0.88750947,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.07078296, -1.60994862, -1.01633316,  0.00387942,  0.07078296,
         0.        ,  0.        ,  0.        

In [22]:
#Lets’ check out our second scaled label:

y[1]

array([ 0.23412827,  0.46183513,  0.92725217, -1.00476947,  0.23412827,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ])

# Modeling

In [23]:
#First, let’s initialise the RNN

model = Sequential()

In [24]:
#Let’s add the input layer and the LSTM layer

model.add(Bidirectional(LSTM(240, input_shape = (window_length, number_of_features), return_sequences = True)))

In [25]:
#Let’s add a first Dropout layer in order to reduce overfitting
model.add(Dropout(0.2))

In [26]:
#Let’s add a second LSTM layer

model.add(Bidirectional(LSTM(240, input_shape = (window_length, 
                                                 number_of_features), return_sequences = True)))

In [27]:
#Let’s add a second Dropout layer

model.add(Dropout(0.2))

In [28]:
#Then, let’s add a third LSTM layer

model.add(Bidirectional(LSTM(240, input_shape = (window_length, number_of_features), 
                             return_sequences = True)))

In [29]:
#Now, let’s add a fourth LSTM layer

model.add(Bidirectional(LSTM(240, input_shape = (window_length, number_of_features), 
                             return_sequences = False)))

In [30]:
#Next, let’s add a dense layer

model.add(Dense(59))

In [31]:
#Finally, let’s add the last output layer

model.add(Dense(number_of_features))

# Now, let's compile the RNN

In [32]:
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=0.0001), loss ='mse', metrics=['accuracy'])

In [33]:
#Next, let’s train our LSTM model

model.fit(x=X, y=y, batch_size=100, epochs=300, verbose=2)

Epoch 1/300
1/1 - 22s - loss: 0.4836 - accuracy: 0.0755 - 22s/epoch - 22s/step
Epoch 2/300
1/1 - 0s - loss: 0.4824 - accuracy: 0.0755 - 433ms/epoch - 433ms/step
Epoch 3/300
1/1 - 0s - loss: 0.4808 - accuracy: 0.1509 - 380ms/epoch - 380ms/step
Epoch 4/300
1/1 - 0s - loss: 0.4798 - accuracy: 0.2264 - 372ms/epoch - 372ms/step
Epoch 5/300
1/1 - 0s - loss: 0.4784 - accuracy: 0.2453 - 387ms/epoch - 387ms/step
Epoch 6/300
1/1 - 0s - loss: 0.4771 - accuracy: 0.2264 - 379ms/epoch - 379ms/step
Epoch 7/300
1/1 - 0s - loss: 0.4759 - accuracy: 0.3208 - 376ms/epoch - 376ms/step
Epoch 8/300
1/1 - 0s - loss: 0.4745 - accuracy: 0.2642 - 377ms/epoch - 377ms/step
Epoch 9/300
1/1 - 0s - loss: 0.4731 - accuracy: 0.2642 - 374ms/epoch - 374ms/step
Epoch 10/300
1/1 - 0s - loss: 0.4716 - accuracy: 0.2830 - 374ms/epoch - 374ms/step
Epoch 11/300
1/1 - 0s - loss: 0.4703 - accuracy: 0.2453 - 372ms/epoch - 372ms/step
Epoch 12/300
1/1 - 0s - loss: 0.4692 - accuracy: 0.2830 - 372ms/epoch - 372ms/step
Epoch 13/300
1/1

Epoch 100/300
1/1 - 0s - loss: 0.3090 - accuracy: 0.3962 - 356ms/epoch - 356ms/step
Epoch 101/300
1/1 - 0s - loss: 0.3066 - accuracy: 0.3962 - 365ms/epoch - 365ms/step
Epoch 102/300
1/1 - 0s - loss: 0.3116 - accuracy: 0.3396 - 359ms/epoch - 359ms/step
Epoch 103/300
1/1 - 0s - loss: 0.2945 - accuracy: 0.3774 - 390ms/epoch - 390ms/step
Epoch 104/300
1/1 - 0s - loss: 0.2874 - accuracy: 0.3585 - 378ms/epoch - 378ms/step
Epoch 105/300
1/1 - 0s - loss: 0.2877 - accuracy: 0.3962 - 367ms/epoch - 367ms/step
Epoch 106/300
1/1 - 0s - loss: 0.2854 - accuracy: 0.3396 - 368ms/epoch - 368ms/step
Epoch 107/300
1/1 - 0s - loss: 0.2810 - accuracy: 0.3962 - 372ms/epoch - 372ms/step
Epoch 108/300
1/1 - 0s - loss: 0.2821 - accuracy: 0.3774 - 364ms/epoch - 364ms/step
Epoch 109/300
1/1 - 0s - loss: 0.2838 - accuracy: 0.3396 - 359ms/epoch - 359ms/step
Epoch 110/300
1/1 - 0s - loss: 0.2873 - accuracy: 0.4151 - 366ms/epoch - 366ms/step
Epoch 111/300
1/1 - 0s - loss: 0.2694 - accuracy: 0.3774 - 366ms/epoch - 366

Epoch 198/300
1/1 - 0s - loss: 0.1182 - accuracy: 0.4528 - 379ms/epoch - 379ms/step
Epoch 199/300
1/1 - 0s - loss: 0.1180 - accuracy: 0.4906 - 379ms/epoch - 379ms/step
Epoch 200/300
1/1 - 0s - loss: 0.1153 - accuracy: 0.4717 - 385ms/epoch - 385ms/step
Epoch 201/300
1/1 - 0s - loss: 0.1211 - accuracy: 0.4151 - 385ms/epoch - 385ms/step
Epoch 202/300
1/1 - 0s - loss: 0.1134 - accuracy: 0.4340 - 385ms/epoch - 385ms/step
Epoch 203/300
1/1 - 0s - loss: 0.1096 - accuracy: 0.4717 - 390ms/epoch - 390ms/step
Epoch 204/300
1/1 - 0s - loss: 0.1098 - accuracy: 0.4528 - 375ms/epoch - 375ms/step
Epoch 205/300
1/1 - 0s - loss: 0.1107 - accuracy: 0.4906 - 399ms/epoch - 399ms/step
Epoch 206/300
1/1 - 0s - loss: 0.1091 - accuracy: 0.5283 - 393ms/epoch - 393ms/step
Epoch 207/300
1/1 - 0s - loss: 0.1052 - accuracy: 0.5094 - 374ms/epoch - 374ms/step
Epoch 208/300
1/1 - 0s - loss: 0.1024 - accuracy: 0.5472 - 376ms/epoch - 376ms/step
Epoch 209/300
1/1 - 0s - loss: 0.1094 - accuracy: 0.5094 - 387ms/epoch - 387

Epoch 296/300
1/1 - 0s - loss: 0.0430 - accuracy: 0.6226 - 402ms/epoch - 402ms/step
Epoch 297/300
1/1 - 0s - loss: 0.0434 - accuracy: 0.6981 - 399ms/epoch - 399ms/step
Epoch 298/300
1/1 - 0s - loss: 0.0497 - accuracy: 0.6226 - 407ms/epoch - 407ms/step
Epoch 299/300
1/1 - 0s - loss: 0.0410 - accuracy: 0.6415 - 420ms/epoch - 420ms/step
Epoch 300/300
1/1 - 0s - loss: 0.0486 - accuracy: 0.5660 - 484ms/epoch - 484ms/step


<keras.src.callbacks.History at 0x23796f1dfd0>

# Evaluation

In [34]:
#Let’s take the results of the last Diamond lottery games:

to_predict = df.tail(8)
to_predict

Unnamed: 0,A,B,C,D,E,A_is_missing,B_is_missing,C_is_missing,D_is_missing,E_is_missing
52,1,35,30,22,1,False,False,False,False,False
53,21,27,9,43,21,False,False,False,False,False
54,7,33,36,10,7,False,False,False,False,False
55,6,10,2,32,6,False,False,False,False,False
56,22,13,24,7,22,False,False,False,False,False
57,24,18,34,33,24,False,False,False,False,False
58,19,23,27,36,19,False,False,False,False,False
59,32,9,28,3,32,False,False,False,False,False


In [35]:
#Let's remove the last raw from the 8 last games

to_predict.drop([to_predict.index[-1]],axis=0, inplace=True)
to_predict

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,A,B,C,D,E,A_is_missing,B_is_missing,C_is_missing,D_is_missing,E_is_missing
52,1,35,30,22,1,False,False,False,False,False
53,21,27,9,43,21,False,False,False,False,False
54,7,33,36,10,7,False,False,False,False,False
55,6,10,2,32,6,False,False,False,False,False
56,22,13,24,7,22,False,False,False,False,False
57,24,18,34,33,24,False,False,False,False,False
58,19,23,27,36,19,False,False,False,False,False


In [36]:
prediction = df.tail(1)
prediction

Unnamed: 0,A,B,C,D,E,A_is_missing,B_is_missing,C_is_missing,D_is_missing,E_is_missing
59,32,9,28,3,32,False,False,False,False,False


In [37]:
#Next, we have to change the format of our last 7 games from dataframe to np.array in order to insert them into our model

to_predict = np.array(to_predict)
to_predict

array([[1, 35, 30, 22, 1, False, False, False, False, False],
       [21, 27, 9, 43, 21, False, False, False, False, False],
       [7, 33, 36, 10, 7, False, False, False, False, False],
       [6, 10, 2, 32, 6, False, False, False, False, False],
       [22, 13, 24, 7, 22, False, False, False, False, False],
       [24, 18, 34, 33, 24, False, False, False, False, False],
       [19, 23, 27, 36, 19, False, False, False, False, False]],
      dtype=object)

In [38]:
#Then, we have to re-scale those 7 games

scaled_to_predict = scaler.transform(to_predict)
scaled_to_predict

array([[-1.72601535,  1.23875404,  0.60332128, -0.07370896, -1.72601535,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.09256234,  0.54815945, -1.09731589,  1.55564695, -0.09256234,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-1.23597945,  1.06610539,  1.08921761, -1.00476947, -1.23597945,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-1.3176521 , -0.91935404, -1.66419494,  0.70217481, -1.3176521 ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.01088969, -0.66038107,  0.11742495, -1.2375346 , -0.01088969,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.15245561, -0.22875946,  0.92725217,  0.77976318,  0.15245561,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.25590764,  0.20286216,  0.36037311,  1.01252831, -0.25590764,
         0.        ,  0.        ,  0.        

In [39]:

#Now, let’s predict the results (i.e., the 6 numbers) of the lottety game based on those 7 games:

y_pred = model.predict(np.array([scaled_to_predict]))
print('The predicted numbers in the last lottery game are:', 
      scaler.inverse_transform(y_pred).astype(int)[0])

The predicted numbers in the last lottery game are: [32  9 29  3 32  0  0  0  0  0]
