### Deep learning model time!!!

<img src = pics/OIP.jpg width = 400>

In [80]:
import cupy
import numpy as np
import math
import time
import torch
cupy.cuda.set_allocator(None)       # no clue
from torch.utils.dlpack import from_dlpack

import numba
from numba import cuda

### Creating dataset
Each monte carlo simulation run is equivalent to one data point being made. In order to make a big dataset, we need to a way to run many monte carlo simulations at a single time.
we will do this by creating a class that can iterate over mc simulations thereby running them all. 
Monte carlo simulations will be run in batches, the class will iterate over the batches. 

here,the mc model from mc_snow, cuda version was imported and cleaned up a bit.
note that due to the existence of batches, some of the varaibles now need a bit of extra finagling to access properly. (s_0, Ki, Ko, mu, sigma, pot,r, d_normals, snowball_path_holder). Overall design is very close to original, though.

In [81]:
@cuda.jit               # defualt GPU
def monte_carlo_andtheholygrail_gpu(d_s, s_0, Ki, Ko, mu, sigma, pot,r,
                                    d_normals, snowball_path_holder, MONTHS,
                                    N_STEPS, N_PATHS, N_BATCH):
    

    # for shared memory (non)optimization
    # shared = cuda.shared.array(shape=0, dtype=numba.float32)
    # # load to shared memory
    # path_offset = cuda.blockIdx.x * cuda.blockDim.x

    # ii - overall thread index
    ii = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
    stride = cuda.gridDim.x * cuda.blockDim.x

    for n in range(ii, N_PATHS * N_BATCH, stride):
        # newly added vars for N_BATCH calculations
        batch_id = n // N_PATHS
        path_id = n % N_PATHS       # equivalent to n in old code 

        snowball_path_holder[n][0] = s_0[batch_id]
        earlyexit = False
        ki = False
        mald = False
        for t in range(N_STEPS):
            # pre shared memory b_motion    
            #                                                   
            b_motion = d_normals[path_id + batch_id * N_PATHS +  t * N_PATHS * N_BATCH]

            # post shared memory b_motion
            # shared[cuda.threadIdx.x] = d_normals[path_offset + cuda.threadIdx.x + t * N_PATHS]

            dt = 1/N_STEPS
            # pre shared memory b_motion
            ds = snowball_path_holder[n][t] * mu[batch_id] * dt + snowball_path_holder[n][t] \
                                                * sigma[batch_id] * b_motion * math.sqrt(dt) 
            # post shared memory b_motion
            # ds = snowball_path_holder[n][t] * mu[batch_id] * dt + snowball_path_holder[n][t] * sigma[batch_id] * shared[cuda.threadIdx.x] * math.sqrt(dt) 
                    # no adjusting list sizes in cuda :(
            # snowball_path.append(snowball_path[t]+ds)
            snowball_path_holder[n][t+1] = snowball_path_holder[n][t] + ds
            

            # ki = snowball_path[t] + ds
            if snowball_path_holder[n][t+1] <= Ki[batch_id]:
                ki = True

            if not mald:
                for month in (0,1,2,3,4,5,6,7,8,9,10,11):                # need to do this instead because contains (in) and range are disabled
                    if t+1 == MONTHS[month]:     #startday no longer used to fake a start date in code
                        # price = t+1+startday
                        if snowball_path_holder[n][t+1] >= Ko[batch_id]:
                            price =  pot[batch_id] * t/365     # should turn t into int
                            # return snowball_path, price
                            d_s[n] =  price * math.exp(-r[batch_id] * t/N_STEPS)   # accounting for r
                            snowball_path_holder[n][-1] = d_s[n]            
                            earlyexit = True
                            mald = True
                            # print("blo got fucked\n")
                            break

        
        if not earlyexit:       # to prevent early exit getting out of bdds error
            # did not get knocked up or down
            price = pot[batch_id]
            # t  =T 
                        # CAN'T USE T CUZ CUDA IS FUCKING SHIT so use -1 instead
                        # or not ig T works now :sob:
            if ki and snowball_path_holder[n][N_STEPS] <= s_0[batch_id]:          # blo got knocked down and never recovered
                price = snowball_path_holder[n][N_STEPS] - s_0[batch_id]
            elif ki and snowball_path_holder[n][N_STEPS] <= Ko[batch_id]:          # blo got knocked down for a bit but finished above Ki
                price =0
            d_s[n] = price * math.exp(-r[batch_id])
            snowball_path_holder[n][-1] = d_s[n]    




And below is the class itself. Instead of having variables just lying around, they can now be inputted into the class itself, making things much more streamlined. Of especial note is the next method, as it takes the place of the cell used to set up and run the mc model in mc_snow.

In [82]:

class SnowballDataSet(object):

    def __init__(self, max_len = 10, number_path = 1000, batch = 2, threads = 512, seed  =1999 ):
        self.num = 0
        self.max_length = max_len
        self.N_PATHS = number_path
        self.N_STEPS = 365
        self.N_BATCH  =batch
        # we will not be calculating a starting date since the difference is negligible and I aint rigging up
        # a system to check if a certain day is a weekend or not
        self.MONTHS = cupy.asnumpy([0, 31,59,90,120,151,181,212,243, 273,304,334])
                # SHOULD THIS BE NP ARRAY INSTEAD????
        self.snowball_path_holder =  np.zeros(self.N_BATCH*self.N_PATHS, dtype=(np.float32,self.N_STEPS+1))# extra 1 is no longer for storing payoff
        # self.snowball_path_holder = cupy.array(self.snowball_path_holder)
        # self.T  = np.float(365.0)         # nah id lose. 
        self.output = cupy.zeros(self.N_BATCH*self.N_PATHS, dtype = cupy.float32)
        self.num_blocks  =(self.N_PATHS * self.N_BATCH -1) // threads +1
        self.num_threads = threads

        #  temp_months, snowball_path_holder both added now
        cupy.random.seed(seed)

    def __len__(self):
        return self.max_length

    def __iter__(self):
        self.num = 0
        return self

    #   next basically takes the place of the cell running the mc. As such need to generate
     # (d_s, s_0, Ki, Ko, mu, sigma,  pot,r, d_normals, snowball_path_holder, MONTHS, N_STEPS, N_PATHS, N_BATCH
     # note that all but s_0, Ki, Ko, mu, sigma,  pot,r, d_normals have been generated in init due to their nonrandom nature
    def __next__(self):
        if self.num > self.max_length: 
            raise StopIteration      # nvidia notebook uses raise StopIteration here but p sure its deprecated???
                                      # is used because return returns an extra None
        # generating the variables
        # d_normals
        randoms = cupy.random.normal(0,1, self.N_BATCH * self.N_PATHS * self.N_STEPS, dtype= cupy.float32)

        Xpre = cupy.random.rand(self.N_BATCH, 7, dtype = cupy.float32)
        #                        s_0,  Ki, Ko,  mu, sigma, pot, r
        Xpre = Xpre * cupy.array([4,  -2,  1,  .01,  .15,  10, .01], dtype=cupy.float32)
        X = Xpre +    cupy.array([8,   0,  0,  .02, .275,  15, .02], dtype=cupy.float32)
        
        # Ki and Ko will be set down here instead of the previous line to make them relative to s_0.
        X[:, 1] = X[:,0] -1         # overriding Ki and Ko 
        X[:, 2] = X[:,0] -.2        
        # print(X)
        X[:, 1] += Xpre[:,1]        # adding back the offset in Xpre after it gets overrided
        X[:, 2] += Xpre[:,2] 

        # making sure self.snowball_path_holder is zeroed to avoid bug
        self.snowball_path_holder.fill(0)

                                        # d_s, s_0, Ki, Ko, mu, sigma, pot,r,
                                        # d_normals, snowball_path_holder, MONTHS,
                                        # N_STEPS, N_PATHS, N_BATCH):
        monte_carlo_andtheholygrail_gpu[(self.num_blocks,), (self.num_threads,)](
                                        self.output, X[:, 0], X[:, 1], X[:, 2], X[:, 3], 
                                        X[:, 4], X[:, 5], X[:, 6],
                                        randoms, self.snowball_path_holder, self.MONTHS,
                                        self.N_STEPS, self.N_PATHS, self.N_BATCH)
        
        o = self.output.reshape(self.N_BATCH, self.N_PATHS)
        Y  =o.mean(axis =1)         # getting the average of each batch
        self.num+=1
        return (from_dlpack(X.toDlpack()), from_dlpack(Y.toDlpack()))



And now a small test run.

In [83]:
ds = SnowballDataSet(10, number_path=10000, batch=16, seed=15)
for i in ds:
    # print(i, "\n")
    print(i[1])     # printing the Ys



tensor([4.1612, 4.1876, 3.7439, 2.3786, 3.1238, 4.2057, 2.9624, 2.9381, 3.1704,
        1.9510, 2.0888, 3.3419, 2.1617, 2.9747, 3.9707, 3.2155],
       device='cuda:0')
tensor([2.8766, 3.2134, 3.8943, 4.5994, 3.4451, 5.9988, 2.4716, 2.8492, 3.4437,
        3.0921, 3.6431, 3.7365, 2.0492, 2.8616, 1.7161, 1.9710],
       device='cuda:0')
tensor([3.2472, 1.8451, 3.4295, 2.7194, 4.0218, 1.8255, 2.0745, 2.8479, 3.3335,
        4.1964, 3.0267, 3.8777, 3.1592, 3.1716, 3.1976, 8.0444],
       device='cuda:0')
tensor([3.5953, 4.1705, 2.6966, 6.9514, 3.4343, 2.3110, 3.2148, 3.2804, 3.6683,
        4.0522, 2.4406, 4.5176, 4.2990, 4.3525, 2.6904, 2.1808],
       device='cuda:0')
tensor([2.2041, 2.8960, 2.0879, 2.1883, 2.2827, 1.9341, 2.7678, 2.1171, 1.8091,
        3.4225, 2.9143, 2.8206, 2.2795, 3.1062, 2.0904, 2.8461],
       device='cuda:0')
tensor([3.1355, 2.5234, 3.1775, 1.9048, 1.6399, 3.7960, 3.1595, 3.3570, 3.2171,
        4.0832, 3.4028, 2.4540, 2.6305, 2.9390, 2.7934, 2.7206],
       dev

### Creating the model

FILL IN LATER YAP YAPP, ADD CODE

In [84]:
%%writefile snow_model.py
import torch.nn as nn
import torch.nn.functional as F
import torch


class Net(nn.Module):

    def __init__(self, hidden=1024):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(7, hidden)
        self.fc2 = nn.Linear(hidden, hidden)
        self.fc3 = nn.Linear(hidden, hidden)
        self.fc4 = nn.Linear(hidden, hidden)
        self.fc5 = nn.Linear(hidden, hidden)
        self.fc6 = nn.Linear(hidden, hidden)
        self.fc7 = nn.Linear(hidden, 1)
        self.register_buffer('norm',
                             torch.tensor([10.0,
                                           8.5,
                                           10.4,
                                           0.025,
                                           0.35,
                                           0.20,
                                           0.025]))

    def forward(self, x):
        # normalize the parameter to range [0-1] 
        x = x / self.norm
        x = F.elu(self.fc1(x))
        x = F.elu(self.fc2(x))
        x = F.elu(self.fc3(x))
        x = F.elu(self.fc4(x))
        x = F.elu(self.fc5(x))
        x = F.elu(self.fc6(x))
        return self.fc7(x)

Overwriting snow_model.py


### Running the model
YAP CENTRAL

In [100]:
from ignite.engine import Engine, Events
from ignite.handlers import Timer
from torch.nn import MSELoss
from torch.optim import Adam
from ignite.contrib.handlers.param_scheduler import CosineAnnealingScheduler
from ignite.handlers import ModelCheckpoint
from snow_model import Net
# from cupy_dataset import OptionDataSet
timer = Timer(average=True)
model = Net().cuda()
loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr=1e-3)
# dataset = OptionDataSet(max_len=10000, number_path = 1024, batch=4800)
dataset = SnowballDataSet(max_len = 50000, number_path = 500000, batch = 4, threads = 512, seed  =1999 )

def train_update(engine, batch):
    model.train()
    optimizer.zero_grad()
    x = batch[0]
    y = batch[1]
    y_pred = model(x)
    loss = loss_fn(y_pred[:,0], y)
    loss.backward()
    optimizer.step()
    return loss.item()

trainer = Engine(train_update)
log_interval = 100

scheduler = CosineAnnealingScheduler(optimizer, 'lr', 1e-4, 1e-6, len(dataset))
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
timer.attach(trainer,
             start=Events.EPOCH_STARTED,
             resume=Events.ITERATION_STARTED,
             pause=Events.ITERATION_COMPLETED,
             step=Events.ITERATION_COMPLETED)    
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    iter = (engine.state.iteration - 1) % len(dataset) + 1
    if iter % log_interval == 0:
        print('loss', engine.state.output, 'average time', timer.value())

# @trainer.on(Events.GET_BATCH_STARTED)
# def log_training_loss(engine):
#     print("EPOCH!!!!!!!!!!!!\n")
        
# trainer.run(dataset, max_epochs=100)
trainer.run(dataset, max_epochs=100)

In [85]:
from ignite.engine import Engine, Events
from ignite.handlers import Timer
from torch.nn import MSELoss
from torch.optim import Adam
from ignite.contrib.handlers.param_scheduler import CosineAnnealingScheduler
from ignite.handlers import ModelCheckpoint
from snow_model import Net
# from cupy_dataset import SnowballDataSet

In [89]:
timer = Timer(average=True) # getting the avg of each run
model = Net.cuda()
loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr = 1e-3)
dataset = SnowballDataSet(max_len = 5000, number_path = 500000, batch = 4800, threads = 512, seed  =1999 )

def train_update(engine, batch):
    model.train()
    optimizer.zero_grad()
    x= batch[0]
    y = batch[1]
    y_pred = model(x)
    loss = loss_fn(y_pred[:,0],y)
    loss.backward()
    optimizer.step()
    return loss.item()

trainer = Engine(train_update)
log_interval = 100

scheduler = CosineAnnealingScheduler(optimizer, 'lr', 1e-4, 1e-6, len(dataset))
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)
timer.attach(trainer,
             start=Events.EPOCH_STARTED,
             resume=Events.ITERATION_STARTED,
             pause=Events.ITERATION_COMPLETED,
             step=Events.ITERATION_COMPLETED)    

@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    iter = (engine.state.iteration -1) % len(dataset) + 1
    if iter % log_interval ==0:
        print('loss', engine.state.output, 'average time', timer.value())

trainer.run(dataset, max_epochs = 1)

TypeError: Module.cuda() missing 1 required positional argument: 'self'