In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
import torch
import torch.nn as nn

class MultiLinearRegression(nn.Module):
    # defining a class for multi linear regression
    def __init__(self):
        super(MultiLinearRegression, self).__init__()
        # defining the constructor
        self.lin = nn.Linear(5, 1)

    def subLayer(self, x) :
        # r = 1
        layer1 = nn.Linear(300, 1)(x)
        # r = 2
        layer2 = nn.Linear(300, 1)(x)
        # r = 3
        layer3 = nn.Linear(300, 1)(x)
        
        # product
        product = layer1 * layer2 * layer3

        return product


    def forward(self, x):
        # defining the forward pass
        x1 = self.subLayer(x)
        x2 = self.subLayer(x)
        x3 = self.subLayer(x)
        x4 = self.subLayer(x)
        x5 = self.subLayer(x)

        x = torch.cat((x1, x2, x3, x4, x5), 1)

        x = self.lin(x)

        return x

net = MultiLinearRegression()
net.to(device)

In [None]:
def test(model, dataloader):
    test_corrects = 0
    total = 0
    with torch.no_grad():
        for x, y in dataloader:
            y = torch.reshape(y, (-1,1)).float()
            x = x.to(device)
            y = y.to(device)
            y_hat = model(x.float())
            test_loss = torch.sqrt(nn.MSELoss()(y, y_hat))
    return test_loss

In [None]:
from tqdm import tqdm
from statistics import mean
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


def train(net, optimizer, loader, epochs=10):
    great_val_loss = torch.zeros(1)*10000
    criterion = nn.MSELoss()
    for epoch in range(epochs):
        running_loss = []
        t = tqdm(loader)
        for x, y in t:
            y = torch.reshape(y, (8,1))
            x, y = x.to(device), y.to(device)
            outputs = net(x.float())
            loss = criterion(outputs, y.float())
            running_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            t.set_description(f'training loss: {np.sqrt(mean(running_loss))}')
        
        validation_loss = test(net, loader)

        if validation_loss < great_val_loss :
          torch.save(net, 'drive/MyDrive/Network/torch_model.pt')
          
        print(f'Epoch {epoch + 1}/{epochs} : loss {np.sqrt(mean(running_loss))} : val_loss {validation_loss}')

In [None]:
class ScatteringDataset():

    def __init__(self, start=0, end=-1):
        self.path = 'drive/MyDrive/Data/'
        self.input = pd.read_csv(self.path + 'scaled_scattering_coef.csv', sep=',', header=None)[start:end]
        self.output = pd.read_csv(self.path + 'train.csv')[start:end]['energy']

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
       
        input = self.input.iloc[idx].values
        output = self.output.iloc[idx]


        return (input.astype(float), output.astype(float))
        

In [None]:
trainset = ScatteringDataset(start=0, end=6400)
testset = ScatteringDataset(start=6400, end=6770)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=True, num_workers=2)

In [None]:
optimizer = optim.Adam(net.parameters(), lr=0.01)

train(net, optimizer, trainloader, epochs=100)

In [None]:
class TestScatteringDataset():

    def __init__(self, start=0, end=-1):
        self.path = 'drive/MyDrive/Data/'
        self.input = pd.read_csv(self.path + 'test_scaled_scattering_coef.csv', sep=',', header=None)[start:end]
      

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
       
        input = self.input.iloc[idx].values

        return input.astype(float)

In [None]:
evalset = TestScatteringDataset()
evalloader = torch.utils.data.DataLoader(evalset, batch_size=1, shuffle=False, num_workers=2)

In [None]:
def evaluate(model, dataloader, index_range = None):

    y_hat = []
    counter = 0
    with torch.no_grad():
        for x_pos in dataloader:
            x_pos = x_pos[:, 1:].to(device)
            y_hat.append(model(x_pos.float()).detach().tolist())

    if index_range is None:
        ids = range(len(y_hat))
    else:
        ids = range(index_range[0],index_range[1]+1)

    print(len(y_hat))
    print(len(ids))
    results = pd.DataFrame({"id": list(ids), "predicted": np.ravel(y_hat)})

    return results

In [None]:
results = evaluate(net, evalloader)
to_sub = results[4:-3].reset_index(drop=True)
to_sub_index = np.arange(6774, 8463)
to_sub['id'] = to_sub_index
to_sub

In [None]:
to_sub.to_csv('/content/drive/MyDrive/Data/to_sub.csv', sep=',', index=False)