In [1]:
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import copy

from torch.utils.tensorboard import SummaryWriter

In [2]:
# read in data
x_pretrain = pd.read_csv('pretrain_features.csv').drop(columns=['Id', 'smiles']).values
y_pretrain = pd.read_csv('pretrain_labels.csv').drop(columns='Id').values

#x_test = pd.read_csv('train_features.csv').drop(columns=['Id', 'smiles']).values
#y_test = pd.read_csv('train_labels.csv').drop(columns='Id').values


X_pre, X_prevalid, y_pre, y_prevalid = train_test_split(x_pretrain, y_pretrain, test_size= 0.2)

In [3]:
# Define Dataset
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.from_numpy(x).float()
        self.y = torch.from_numpy(y).float()
        self.len = x.shape[0]

    def __getitem__(self, key):
        return (self.x[key],self.y[key])

    def __len__(self):
        return self.len

In [4]:
#data_transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5],std=[0.5])])


myDataset_pretrain = MyDataset(X_pre, y_pre)
dataLoader_pretrain = DataLoader(dataset=myDataset_pretrain, batch_size=128, shuffle=True)

myDataset_test = MyDataset(X_prevalid, y_prevalid)
dataLoader_test = DataLoader(dataset=myDataset_test, batch_size=128, shuffle=True)

In [5]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # create Sequential objects
        self.layer1 = nn.Sequential(nn.Linear(1000, 64), nn.ReLU(), nn.Dropout(0.25))
        #self.layer2 = nn.Sequential(nn.Linear(128, 32), nn.ReLU(), nn.Dropout(0.2))
        self.layer3 = nn.Sequential(nn.Linear(64, 1))
        self.layers = [attr for attr in dir(self) if 'layer' in attr]
        self.layers.sort()

    def forward(self, x):
        layers = [getattr(self, layer) for layer in self.layers]
        for layer in layers:
            x = layer(x)
        return x

    # def get_layer_2(self, x):
    #     x = self.layer1(x)
    #     x = self.layer2(x)
    #     # x = self.layer3(x)
    #     return x


net = Net()

print(net)

Net(
  (layer1): Sequential(
    (0): Linear(in_features=1000, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
  )
  (layer3): Sequential(
    (0): Linear(in_features=64, out_features=1, bias=True)
  )
)


In [9]:
# define learning rate
lr = 3e-5
criterion = nn.MSELoss()
#criterion = nn.CrossEntropyLoss()
# adam梯 
optimizer = torch.optim.Adam(net.parameters(), lr)


In [10]:


train_losses = []
train_acces = []

eval_losses = []
eval_acces = []

min_eval_loss = 1e5

# train the network
for e in range(2000):

    train_loss = 0
    train_acc = 0
    net.train()   #

    for features, labels in dataLoader_pretrain:

        # forward, get loss 
        outputs = net(features)
        loss = criterion(outputs, labels)

        # backpropagation，set the previous gradient to zero，use step function to update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # record the loss
        train_loss += loss.item()

    train_losses.append(train_loss / myDataset_pretrain.len)

    
    # test on validation set
    eval_loss = 0
    eval_acc = 0
    net.eval()  

    # updated each time by a small group of data 
    for features, labels in dataLoader_test:
        features = Variable(features)  
        labels = Variable(labels)  
    

        outputs = net(features)  
        # label = label.unsqueeze(1)
        loss = criterion(outputs, labels)  

        #record the loss
        eval_loss += loss.item()
        
#         pred = outputs.max(1)[1]
#         num_correct = (pred==labels).sum().item()
#         acc = num_correct / x.shape[0]
#         eval_acc += acc
    if eval_loss < min_eval_loss:
        print(f"Saving best checkpoint from epoch {e}.")
        torch.save(net.state_dict(), "task_4_best_pretrained_model.pth")
        min_eval_loss = eval_loss

    eval_losses.append(eval_loss / myDataset_test.len)
    # eval_acces.append(eval_acc / myDataset_test.len)
    print('epoch: {}, Train Loss: {:.6f},Eval Loss: {:.6f}'
           .format(e, train_loss / myDataset_pretrain.len, eval_loss / myDataset_test.len))
    #log_writer.add_scalar("loss/train", float(train_loss), e)





Saving best checkpoint from epoch 0.
epoch: 0, Train Loss: 0.000048,Eval Loss: 0.000036
Saving best checkpoint from epoch 1.
epoch: 1, Train Loss: 0.000048,Eval Loss: 0.000036
epoch: 2, Train Loss: 0.000049,Eval Loss: 0.000036
Saving best checkpoint from epoch 3.
epoch: 3, Train Loss: 0.000048,Eval Loss: 0.000035
epoch: 4, Train Loss: 0.000048,Eval Loss: 0.000035
epoch: 5, Train Loss: 0.000048,Eval Loss: 0.000036
epoch: 6, Train Loss: 0.000048,Eval Loss: 0.000036
Saving best checkpoint from epoch 7.
epoch: 7, Train Loss: 0.000048,Eval Loss: 0.000035
epoch: 8, Train Loss: 0.000048,Eval Loss: 0.000036
epoch: 9, Train Loss: 0.000048,Eval Loss: 0.000036
epoch: 10, Train Loss: 0.000048,Eval Loss: 0.000036
epoch: 11, Train Loss: 0.000048,Eval Loss: 0.000036
epoch: 12, Train Loss: 0.000048,Eval Loss: 0.000036
Saving best checkpoint from epoch 13.
epoch: 13, Train Loss: 0.000049,Eval Loss: 0.000035
epoch: 14, Train Loss: 0.000048,Eval Loss: 0.000035
epoch: 15, Train Loss: 0.000048,Eval Loss: 0

In [11]:
x_train = pd.read_csv('train_features.csv').drop(columns=['Id', 'smiles']).values
y_train = pd.read_csv('train_labels.csv').drop(columns='Id').values
X_train, X_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size= 0.1)


In [12]:
myDataset_train = MyDataset(X_train, y_train)
dataLoader_train = DataLoader(dataset=myDataset_train, batch_size=4, shuffle=True)

myDataset_valid = MyDataset(X_valid, y_valid)
dataLoader_valid = DataLoader(dataset=myDataset_valid, batch_size=4, shuffle=False)

In [13]:
try:
    net_copy = copy.deepcopy(net_copy)
except:
    net_copy = copy.deepcopy(net)

In [14]:
finetune_net = Net()
finetune_net.load_state_dict(torch.load("task_4_best_pretrained_model.pth"))
for param in finetune_net.parameters():
    if param.shape[0]!=1:
        param.requires_grad = False

In [15]:
for param in finetune_net.parameters():
    print(param.shape)

torch.Size([64, 1000])
torch.Size([64])
torch.Size([1, 64])
torch.Size([1])


In [30]:
#freeze，lr = 5e-4，2000 epoch
#freeze，lr = 5e-5，2000 epoch 
#unfreeze，lr = 1e-6 or smaller，2000 epoch

#unfreeze all layers
for param in finetune_net.parameters():
	param.requires_grad = True


#lr = 5e-5
lr = 1e-6
criterion = nn.MSELoss()
#criterion = nn.CrossEntropyLoss()


optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, finetune_net.parameters()), lr)
#optimizer = torch.optim.Adam(finetune_net.parameters(), lr)

#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.1)

train_losses = []
train_acces = []

eval_losses = []
eval_acces = []


for e in range(500):

    train_loss = 0
    train_acc = 0
    finetune_net.train()   #

    for features, labels in dataLoader_train:

        # forward, get loss
        outputs = finetune_net(features)
        loss = criterion(outputs, labels)

        # backpropagate
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #scheduler.step()

        
        train_loss += loss.item()

    train_losses.append(train_loss / myDataset_train.len)

    #test on validation set
    eval_loss = 0
    eval_acc = 0
    finetune_net.eval() 


    for features, labels in dataLoader_valid:
        features = Variable(features)  
        labels = Variable(labels)  

        outputs = finetune_net(features)  
        # label = label.unsqueeze(1)
        loss = criterion(outputs, labels) 

        # record loss
        eval_loss += loss.item()
        
#         pred = outputs.max(1)[1]
#         num_correct = (pred==labels).sum().item()
#         acc = num_correct / x.shape[0]
#         eval_acc += acc

    eval_losses.append(eval_loss / myDataset_valid.len)
    # eval_acces.append(eval_acc / myDataset_test.len)
    print('epoch: {}, Train Loss: {:.6f},Eval Loss: {:.6f}'
          .format(e, train_loss / myDataset_train.len, eval_loss / myDataset_valid.len))
    #log_writer.add_scalar("loss/train", float(train_loss), e)


epoch: 0, Train Loss: 0.004638,Eval Loss: 0.073662
epoch: 1, Train Loss: 0.004172,Eval Loss: 0.073637
epoch: 2, Train Loss: 0.004245,Eval Loss: 0.073656
epoch: 3, Train Loss: 0.004286,Eval Loss: 0.073643
epoch: 4, Train Loss: 0.005515,Eval Loss: 0.073621
epoch: 5, Train Loss: 0.005675,Eval Loss: 0.073613
epoch: 6, Train Loss: 0.003697,Eval Loss: 0.073615
epoch: 7, Train Loss: 0.003401,Eval Loss: 0.073620
epoch: 8, Train Loss: 0.005081,Eval Loss: 0.073623
epoch: 9, Train Loss: 0.004678,Eval Loss: 0.073587
epoch: 10, Train Loss: 0.005962,Eval Loss: 0.073600
epoch: 11, Train Loss: 0.004030,Eval Loss: 0.073600
epoch: 12, Train Loss: 0.006070,Eval Loss: 0.073605
epoch: 13, Train Loss: 0.003070,Eval Loss: 0.073575
epoch: 14, Train Loss: 0.005284,Eval Loss: 0.073588
epoch: 15, Train Loss: 0.003989,Eval Loss: 0.073583
epoch: 16, Train Loss: 0.003438,Eval Loss: 0.073571
epoch: 17, Train Loss: 0.004354,Eval Loss: 0.073605
epoch: 18, Train Loss: 0.004589,Eval Loss: 0.073638
epoch: 19, Train Loss:

In [31]:
[para.shape for para in finetune_net.parameters()]

[torch.Size([64, 1000]),
 torch.Size([64]),
 torch.Size([1, 64]),
 torch.Size([1])]

In [32]:
type(finetune_net)

__main__.Net

In [33]:
test_X = pd.read_csv('test_features.csv')
test_id = test_X.Id.values
test_X = test_X.drop(columns=['Id', 'smiles']).values
test_X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [34]:
finetune_net.eval()
result = finetune_net(torch.Tensor(test_X)).detach().numpy()
result = pd.DataFrame(result, index=test_id).reset_index(drop=False).rename({'index': 'Id', 0:'y'}, axis=1).to_csv('finetunewed.txt', header=True, index=False)