In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import argparse
from torch.utils.data import Dataset, DataLoader
import pickle
from tqdm import tqdm
from models import *


In [2]:
# Define the dataset class
class PDECO_Dataset(Dataset):
    '''
    Dataset = (X,y)
    X = (sensors_1,sensors_2,xt)
    y = solution
    '''
    # Initialize the dataset
    def __init__(self, data):
        self.x = data[0]
        self.y = data[1]
    # Define the getitem function
    def __getitem__(self, index):
        return (self.x[0][index],self.x[1][index],self.x[2]),self.y[index]
    # Define the len function
    def __len__(self):
        return len(self.x[0])

In [14]:
# Define the training loop

def train(model,dataloader,criterion,optimizer):
    model.train()
    running_loss = 0.0

    for inputs,targets in dataloader:
        
        # Remember to zero grad the optimizer
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        
        loss = criterion(outputs,targets)
        
        loss.backward()
        
        optimizer.step()
        

        running_loss += loss.item() * inputs[0].size(0)

    epoch_loss = running_loss / len(dataloader.dataset)

    return epoch_loss


In [4]:
def network(problem):

    if problem == 'Heat1D':
        branch = [100]
        trunk = [100]

    return branch,trunk


In [5]:
problem  = 'Heat1D'
len_control = 101
len_uncertainty = 101
lr = 10**(-3)
epochs = 1000
arch_trunk = [64]
arch_branch = [64]
dot_layer = 200
eval_point_dim = 2
data_points = 5000
testing_points =  750
activation_fn = 'nn.Tanh()'
training_data_path = f'/work2/Sebas/OUU_MIONET/PDECO/attemp1/data/heat_train_{10000}.pkl'
testing_data_path = f'/work2/Sebas/OUU_MIONET/PDECO/attemp1/data/DR_test_{2000}.pkl'

eval_point_imag = len_control*len_uncertainty
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
model = MIONET(n_branches=2,input_sizes=[len_control,len_uncertainty,eval_point_dim],architectures=[arch_branch,arch_branch,arch_trunk],output_size=dot_layer,eval_point_imag= eval_point_imag,activation_fn=eval(activation_fn),device=device,model='FeedForwardNN').to(device)
# Loss funciton
criterion = nn.MSELoss()
# Optimizer, modify to ADAM
optimizer = optim.Adam(model.parameters(),lr=lr)


In [7]:
# Load the data
training_data = torch.load(training_data_path,pickle_module=pickle)


In [36]:
data_loader_2 = DataLoader(PDECO_Dataset(training_data), batch_size=500, shuffle=True, num_workers=4)

In [8]:
testing_data = torch.load(testing_data_path,pickle_module=pickle)

In [9]:
xt = torch.tensor([(x, y) for x in torch.linspace(0, 1, len_control) for y in torch.linspace(0, 1, len_uncertainty)]).to(device)

In [10]:
sensors_1 = torch.zeros((data_points,len_control)).to(device)
sensors_2 = torch.zeros((data_points,len_uncertainty)).to(device)
y_train = torch.zeros((data_points,len(xt))).to(device)



In [11]:
# This part is done poorly, how to optimize it?
for i in tqdm(range(data_points)):
    # print(training_data[i]['solution'])
    sensors_1[i,:] = torch.from_numpy(training_data[i][0])
    sensors_2[i,:] = torch.from_numpy(training_data[i][1])
    y_train[i,:] = torch.from_numpy(training_data[i][2])

X_train = (sensors_1,sensors_2,xt)



100%|██████████| 5000/5000 [00:34<00:00, 146.73it/s]


In [40]:
data_train = X_train,y_train
data_loader = DataLoader(PDECO_Dataset(data_train),batch_size=150,shuffle=True)

In [25]:
for epoch in tqdm(range(epochs)):
    epoch_loss = train(model,data_loader,criterion,optimizer)
    if epoch % 100 == 0:
        print('Epoch: {} Loss: {}'.format(epoch,epoch_loss))

  0%|          | 1/1000 [00:01<31:11,  1.87s/it]

Epoch: 0 Loss: 0.007103652055375278


 10%|█         | 101/1000 [03:09<28:04,  1.87s/it]

Epoch: 100 Loss: 0.005191097271163017


 20%|██        | 201/1000 [06:16<24:57,  1.87s/it]

Epoch: 200 Loss: 0.0058359472081065174


 30%|███       | 301/1000 [09:24<21:49,  1.87s/it]

Epoch: 300 Loss: 0.003909536488354206


 40%|████      | 401/1000 [12:31<18:42,  1.87s/it]

Epoch: 400 Loss: 0.003129308302886784


 50%|█████     | 501/1000 [15:38<15:35,  1.87s/it]

Epoch: 500 Loss: 0.004762738598510623


 60%|██████    | 601/1000 [18:46<12:27,  1.87s/it]

Epoch: 600 Loss: 0.0033887353679165244


 70%|███████   | 701/1000 [21:53<09:20,  1.87s/it]

Epoch: 700 Loss: 0.0034533711825497446


 80%|████████  | 801/1000 [25:01<06:12,  1.87s/it]

Epoch: 800 Loss: 0.00282672127825208


 90%|█████████ | 901/1000 [28:08<03:05,  1.87s/it]

Epoch: 900 Loss: 0.002524211793206632


100%|██████████| 1000/1000 [31:14<00:00,  1.87s/it]


In [26]:
torch.save(model.state_dict(),f'/work2/Sebas/OUU_MIONET/PDECO/attemp1/models/heat_mionet_training_{data_points}.pt')

In [74]:
sensors_1 = torch.zeros((testing_points,len_control)).to(device)
sensors_2 = torch.zeros((testing_points,len_uncertainty)).to(device)
y_test = torch.zeros((testing_points,len(xt))).to(device)

for i in range(testing_points):
    sensors_1[i,:] = torch.from_numpy(training_data[i][0])
    sensors_2[i,:] = torch.from_numpy(training_data[i][1])
    y_train[i,:] = torch.from_numpy(training_data[i][2])

X_test = (sensors_1,sensors_2,xt)
data_test = X_test,y_test
data_loader_test = DataLoader(PDECO_Dataset(data_test),batch_size=550,shuffle=True)


In [63]:
model.eval()

MIONET(
  (branch_nets): ModuleList(
    (0-1): 2 x FeedForwardNN(
      (layers): ModuleList(
        (0): Linear(in_features=101, out_features=64, bias=True)
        (1): Tanh()
        (2): Linear(in_features=64, out_features=200, bias=False)
      )
    )
  )
  (trunk_net): FeedForwardNN(
    (layers): ModuleList(
      (0): Linear(in_features=2, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=200, bias=False)
    )
  )
)

In [64]:
with torch.no_grad():
    av_loss = 0
    for inputs,targets in data_loader_test:
        outputs = model(inputs)
        loss = criterion(outputs,targets)
        # print(loss)
        av_loss += loss
    print(av_loss/len(data_loader_test)) 

tensor(0.1032, device='cuda:0')


In [65]:
model_no_opt = MIONET(n_branches=2,input_sizes=[len_control,len_uncertainty,eval_point_dim],architectures=[arch_branch,arch_branch,arch_trunk],output_size=dot_layer,eval_point_imag= eval_point_imag,activation_fn=eval('nn.ReLU()'),device=device,model='FeedForwardNN').to(device)

In [66]:
model_no_opt.eval()
with torch.no_grad():
    av_loss = 0
    for inputs,targets in data_loader_test:
        outputs = model_no_opt(inputs)
        loss = criterion(outputs,targets)
        # print(loss)
        av_loss += loss
    print(av_loss/len(data_loader_test)) 

tensor(0.2422, device='cuda:0')


In [75]:


X_t = X_test[0]
y_t = y_test[0]

In [76]:
sol = model(X_t)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x101 and 2x64)