### PRELUDE ###
In this algorithm, i will run a Long-Short-Term- Memory model on a truely random data. The data is a random powerball game data downloaded online. I will run the LSTM model to predict the next powerball number.
<p> The architecture is pretty simple. Convert the powerball numbes into one-hot encoded value of all possible values, pass the encoded data into a 2-layered LSTM connected to a fully connected layer to predict the powerball number. <p> To split the task, i'll make the predicton in two ways
    <ul>
        <li>Predict whether the next powerball number will be odd or even</li>
        <li> Predict the exact number of the next powerball number </li>

In [20]:
# Import the data
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from torch import nn
import torch.nn.functional as F

In [2]:
data_csv = pd.read_csv('data/powerball_dataT.csv', encoding = 'ISO-8859-1')

In [3]:
data_csv = data_csv[['Pevodd', 'PowerBall']]
data_csv.dropna(axis =0, inplace = True)

In [4]:
data_csv['Pevodd'].unique()

array(['even', 'odd'], dtype=object)

In [17]:
data_csv['PowerBall'].unique()

array([8, 9, 3, 1, 2, 6, 7, 4, 5, 0], dtype=int64)

In [6]:
data_csv['z'] = data_csv['Pevodd'].apply(lambda x: 0. if x=='even' else 1.)
data_csv= data_csv.astype({'PowerBall': 'int32', 'z':'int32'})
z_data = list(data_csv['z'][1:])
z_data.append(data_csv['z'][0])
x_data = list(data_csv['PowerBall'])
y_data = x_data[1:]
y_data.append(x_data[0])


In [25]:
tuple(set(x_data))

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

In [7]:
print(len(x_data), len(y_data), len(z_data))

745450 745450 745450


In [8]:
x_train, x_valid, x_test = np.array(x_data[:600000]), np.array(x_data[600000:672725]), np.array(x_data[672725:])
y_train, y_valid, y_test = np.array(y_data[:600000]), np.array(y_data[600000:672725]), np.array(y_data[672725:])
z_train, z_valid, z_test = np.array(z_data[:600000]), np.array(z_data[600000:672725]), np.array(z_data[672725:])

In [9]:
# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train))
valid_data = TensorDataset(torch.from_numpy(x_valid), torch.from_numpy(y_valid))
test_data = TensorDataset(torch.from_numpy(x_test), torch.from_numpy(y_test))
z_train_data = TensorDataset(torch.from_numpy(x_train), torch.from_numpy(z_train))
z_valid_data = TensorDataset(torch.from_numpy(x_valid), torch.from_numpy(z_valid))
z_test_data = TensorDataset(torch.from_numpy(x_test), torch.from_numpy(z_test))

In [11]:
# dataloaders
batch_size= 120
seq_length= 100
batch= batch_size * seq_length

# make sure to SHUFFLE your data
train_loader = DataLoader(train_data, shuffle=False, batch_size=batch, drop_last = True)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch, drop_last= True)


In [12]:
def oneHotEncode(arr, cols_num):
    """
    This function takes in an pytorch dataloader object and returns a one-hot encoded array with dimensions of array x n_labels.
    E.G if it takes an array of [3,2,1] and n_labels of 8, it returns a 3x3(array_size by cols_num) hot encoded array like so
    [[0 0 1 0 0 0 0 0]
    [0 1 0 0 0 0 0 0]
    [1 0 0 0 0 0 0 0]]
    """
    array = np.array(arr)
    # First, create an array.size by cols_num array of zeros in float
    one_hot = np.zeros((array.size,cols_num), dtype= np.float32)
    
    # Fill a "1" to each row based on the value in array
    one_hot[np.arange(one_hot.shape[0]), array.flatten()] = 1.
    
    # Return back to the original shape
    one_hot = one_hot.reshape((*array.shape, cols_num))
    return torch.from_numpy(one_hot)

In [15]:
dataiter= iter(train_loader)
sample_x, sample_y = dataiter.next()

validiter = iter(valid_loader)
valid_x, valid_y = dataiter.next()

print(oneHotEncode(sample_x, 10))

tensor([[0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.]])


In [18]:
train_on_gpu = torch.cuda.is_available()
if (train_on_gpu):
  print('Yaay, CUDA is available, now you can train')
else:
  print('Dude, dont try it, your pc is gonna crash')

Dude, dont try it, your pc is gonna crash


In [26]:
class yorLSTM(nn.Module):
    def __init__(self, tokens, n_hidden, n_layers=2,drop_prob=0.5,lr= 0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden= n_hidden
        self.lr = lr
            
        #Data entered
        self.chars = tuple(set(tokens))
            
        # Define model layers
        self.lstm = nn.LSTM(len(self.chars), self.n_hidden, self.n_layers,dropout=drop_prob,batch_first=True)
            
        # Dropout in between layers
        self.dropout =nn.Dropout(drop_prob)
            
        # Connect to a fully connected layer
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
    def forward(self, x, hidden):
        out, hidden = self.lstm(x, hidden)
        
        # Dropout to avoid overfitting
        out= self.dropout(out)
        
        # Reshape for fully connected layer
        out = out.contiguous().view(-1, self.n_hidden)
        
        # Pass through a fully connected layer
        out = self.fc(out)
        
        #return out, hidden
        return out, hidden
        
    def init_hidden(self, batch_size):
        # Initialize the weight and hidden value
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers,batch_size,self.n_hidden).zero_())
        
        return hidden
            

In [27]:
def train(network,training_data, validation_data, epochs, batch_size,lr, seq_length, clip=5, vis=10):
    # Set the RNN network to train
    network.train()
    
    #Set the optimiser and calculate the loss
    optimiser = torch.optim.Adam(network.parameters(), lr= lr)
    criterion = nn.CrossEntropyLoss()
    
    # Run on CUDA if available
    if (train_on_gpu):
        network.cuda()
        
    counter = 0
    # set total number of characters
    n_chars = len(network.chars)
    
    # Train in range of epochs
    for i in range(epochs):
        # initialise the hidden state
        h = network.init_hidden(batch_size)
        for x, y in training_data:
            counter+=1
            
            # One-Hot-Encode the training data
            x = oneHotEncode(x.reshape(batch_size,seq_length), n_chars)
            
            # If on Cuda, cnvert the x and y to cuda
            if (train_on_gpu):
                x, y = x.cuda(), y.cuda()
            
            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            h = tuple([each.data for each in h])
            
            # Set accumulated gradient to zero
            network.zero_grad()
            
            output, h = network(x, h)
            
            # Calculate the loss and back propagate
            loss = criterion(output, y.view(batch_size*seq_length).long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(network.parameters(), clip)
            optimiser.step()
            
            # Calculate validation loss at every 10 iteration
            if counter% vis == 0:
                # Initialise the hidden state
                val_h = network.init_hidden(batch_size)
                validation_losses = []
                
                #set network to evalution
                network.eval()
                
                for x,y in validation_data:
                    x = oneHotEncode(x.reshape(batch_size,seq_length), n_chars)
                    
                    if (train_on_gpu):
                        x,y = x.cuda(), y.cuda()
                    val_h = tuple([each for each in val_h])
                    
                    output, val_h = network(x, val_h)
                    
                    #Calculate the loss
                    loss = criterion(output, y.view(batch_size* seq_length).long())
                    
                    # Append loss to validation losses
                    validation_losses.append(loss.item())
                    
                    # Set network back to training
                    network.train()
                    
                    print("Epoch: {}/{}...".format(i+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(validation_losses)))

In [28]:
# Instantiate the LSTM Model
n_hidden = 500

network = yorLSTM(x_data,n_hidden)
print(network)

yorLSTM(
  (lstm): LSTM(10, 500, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=500, out_features=10, bias=True)
)


In [29]:
# Initiate the training
epochs = 40
lr = 0.001
train(network,train_loader,valid_loader,epochs,batch_size,lr,seq_length)

(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)

In [None]:
def predict(network, char, h= None, top_k= None):
    # tensor inputs
    inputs = torch.from_numpy(x)

    inputs = oneHotEncode(inputs, len(network.chars))
    
    if(train_on_gpu):
        inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = network(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu
        
        # get top characters
        if top_k is None:
            top_ch = np.arange(len(network.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return char, h

In [30]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
def sample(network, size, prime, top_k=None):
        
    if(train_on_gpu):
        network.cuda()
    else:
        network.cpu()
    
    network.eval() # eval mode
    
    # First off, run through the prime characters
    chars = prime
    h = network.init_hidden(1)
    for ch in prime:
        char, h = predict(network, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(network, chars[-1], h, top_k=top_k)
        chars.append(char)

    return chars