In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [4]:
import torch
import torch.nn as nn


class SimpleFCmodel(nn.Module):
    # defining a class for multi linear regression
    def __init__(self):
        super(SimpleFCmodel, self).__init__()
        # defining the constructor
        self.fc1 = nn.Linear(300, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 8)
        self.fc5 = nn.Linear(8, 1)

        self.act = nn.ReLU()

    def forward(self, x):
        # defining the forward pass
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.act(x)
        x = self.fc3(x)
        x = self.act(x)
        x = self.fc4(x)
        x = self.act(x)
        x = self.fc5(x)
        
        return x

net = SimpleFCmodel()
net.to(device)

SimpleFCmodel(
  (fc1): Linear(in_features=300, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (fc4): Linear(in_features=32, out_features=8, bias=True)
  (fc5): Linear(in_features=8, out_features=1, bias=True)
  (act): ReLU()
)

In [5]:
def test(model, dataloader):
    test_corrects = 0
    total = 0
    with torch.no_grad():
        for x, y in dataloader:
            y = torch.reshape(y, (-1,1)).float()
            x = x.to(device)
            y = y.to(device)
            y_hat = model(x.float())
            test_loss = torch.sqrt(nn.MSELoss()(y, y_hat))
    return test_loss

In [6]:
from tqdm import tqdm
from statistics import mean
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


def train(net, optimizer, loader, epochs=10):
    great_val_loss = torch.zeros(1)*10000
    criterion = nn.MSELoss()
    for epoch in range(epochs):
        running_loss = []
        t = tqdm(loader)
        for x, y in t:
            y = torch.reshape(y, (8,1))
            x, y = x.to(device), y.to(device)
            outputs = net(x.float())
            loss = criterion(outputs, y.float())
            running_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            t.set_description(f'training loss: {np.sqrt(mean(running_loss))}')
        
        validation_loss = test(net, loader)

        if validation_loss < great_val_loss :
          torch.save(net, 'drive/MyDrive/Network/torch_model.pt')
          
        print(f'Epoch {epoch + 1}/{epochs} : loss {np.sqrt(mean(running_loss))} : val_loss {validation_loss}')

In [7]:
class ScatteringDataset():

    def __init__(self, start=0, end=-1):
        self.path = 'drive/MyDrive/Data/'
        self.input = pd.read_csv(self.path + 'scaled_scattering_coef.csv', sep=',', header=None)[start:end]
        self.output = pd.read_csv(self.path + 'train.csv')[start:end]['energy']

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
       
        input = self.input.iloc[idx].values
        output = self.output.iloc[idx]


        return (input.astype(float), output.astype(float))
        

In [8]:
trainset = ScatteringDataset(start=0, end=6400)
testset = ScatteringDataset(start=6400, end=6770)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=8, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=True, num_workers=2)

In [22]:
optimizer = optim.Adam(net.parameters(), lr=1e-6)

train(net, optimizer, trainloader, epochs=1)

training loss: 0.9043204654994331: 100%|██████████| 800/800 [00:07<00:00, 102.89it/s]


Epoch 1/1 : loss 0.9043204654994331 : val_loss 0.17528420686721802


In [23]:
class TestScatteringDataset():

    def __init__(self, start=0, end=-1):
        self.path = 'drive/MyDrive/Data/'
        self.input = pd.read_csv(self.path + 'test_scaled_scattering_coef.csv', sep=',', header=None)[start:end]
      

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
       
        input = self.input.iloc[idx].values

        return input.astype(float)

In [24]:
evalset = TestScatteringDataset()
evalloader = torch.utils.data.DataLoader(evalset, batch_size=1, shuffle=False, num_workers=2)

In [25]:
def evaluate(model, dataloader, index_range = None):

    y_hat = []
    counter = 0
    with torch.no_grad():
        for x_pos in dataloader:
            x_pos = x_pos[:, 1:].to(device)
            y_hat.append(model(x_pos.float()).detach().tolist())

    if index_range is None:
        ids = range(len(y_hat))
    else:
        ids = range(index_range[0],index_range[1]+1)

    print(len(y_hat))
    print(len(ids))
    results = pd.DataFrame({"id": list(ids), "predicted": np.ravel(y_hat)})

    return results

In [26]:
results = evaluate(net, evalloader)
to_sub = results[4:-3].reset_index(drop=True)
to_sub_index = np.arange(6774, 8463)
to_sub['id'] = to_sub_index
to_sub

1696
1696
(1696, 2)


Unnamed: 0,id,predicted
0,0,-63.204533
1,1,-71.744011
2,2,-84.432899
3,3,-82.251953
4,4,-84.828003
...,...,...
1691,1691,-84.410736
1692,1692,-76.723839
1693,1693,-83.754623
1694,1694,-72.579544


In [31]:
to_sub.to_csv('/content/drive/MyDrive/Data/to_sub.csv', sep=',', index=False)