In [40]:
import os
import random
import math
import numpy as np
from scipy.io import loadmat
import types

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

seed = 999
np.random.seed(seed)

class PressToAmpV2(Dataset):
    '''
    prepare the data in the way more suitable for lstm.
    output of the data should be of the shape
    (seq_len, batch, input_size)
    '''
    
    def __init__(self, data_dir, file_names, purpose='train'):
        self.data = []
        if not isinstance(file_names, list):
            raise TypeError("file_name needs to be a list")

        for file_name in file_names:
            data_path = os.path.join(data_dir, file_name)
            data = loadmat(data_path)
            data = data["data"]
            self.data.append(data)
        
        # concatenate the data
        self.data = np.concatenate(self.data, 0)
        self.data = self.data.astype('float32')

        # shuffle the data
        #np.random.shuffle(self.data)

        # normalize the rpm
        #rpm = self.data[:, 0] / 10000.0
        #rpm = rpm.reshape(-1, 1)
        
        pressure = self.data[:, 35:]
        
        mmt = self.data[:, 34].astype('int32')
        # define features according to mmt
        self.features = []
        for i in range(len(mmt)):
            obs = pressure[i]
            m = mmt[i].item()
            tm = m*6
            obs = obs[:tm]
            obs = obs.reshape(6, m)
            obs = np.transpose(obs)
            # If mmt is not 1750, pad zeros
            # make obs of shape (1750,6)
            p = np.zeros([1750-m, 6]).astype('float32')
            obs = np.concatenate((obs, p), axis=0)
            self.features.append(obs)
        
        # targets is the average pressure on 33 blades
        self.targets = self.data[:,1:34] 
        self.targets = np.sum(self.targets, axis=1, keepdims=True)/33.0
        
        # keep 10% for validation and 10% for test
        self.purpose = purpose
        l = self.__len__()
        tr= math.floor(l*0.8)
        v= math.floor(l*0.1)
        te = math.floor(l*0.1)

        if self.purpose == "train":
            self.features = self.features[:tr]
            self.targets = self.targets[:tr]
        elif self.purpose == "validate":
            self.features = self.features[tr:tr+v]
            self.targets = self.targets[tr:tr+v]
        elif self.purpose == "test":
            self.features = self.features[tr+v:tr+v+te]
            self.targets = self.targets[tr+v:tr+v+te]
        else:
            raise ValueError("purpose must be train, validate, or test")

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]


class ModelV2(nn.Module):
    '''An lstm model'''
    def __init__(self, input_size=6, hidden_size=100, 
                 fc_units=[10]):
        super(ModelV2, self).__init__()
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size=input_size, 
                            hidden_size=hidden_size,
                           batch_first=True)
        
        self.fc_layers = []
        
        self.fc_layers.append(nn.Sequential(
            nn.Linear(100, fc_units[0]),
            nn.BatchNorm1d(fc_units[0]),
            nn.Sigmoid()
        ))
        
        for i in range(1, len(fc_units)-1): 
            f = nn.Sequential(
                nn.Linear(fc_units[i], fc_units[i+1]),
                nn.BatchNorm1d(fc_units[i+1]),
                nn.Sigmoid())
            self.fc_layers.append(f)
            
        self.fc_layers.append(nn.Linear(fc_units[-1], 1))
        
        self.fc_layers = nn.Sequential(*self.fc_layers)
        
    def forward(self, x, h_0, c_0):
        output, (h_n, c_n) = self.lstm(x, (h_0, c_0))
        # feed the h_n to fc layers
        x = h_n.view(-1, self.hidden_size)
        x = self.fc_layers(x)
        return x

    

class Trainer(object):
    def __init__(self, dataset_train, dataset_val, 
                 model, batch_size, epochs, resume_from):
        
        # set random seet
        torch.manual_seed(99)

        print("Size of training set is:{}".format(
            dataset_train.__len__()))

        print("Size of validation set is:{}".format(
            dataset_val.__len__()))
        
        self.train_loader = DataLoader(dataset_train,
            batch_size=batch_size, shuffle=True, num_workers=2)

        self.val_loader = DataLoader(dataset_val,
            batch_size=batch_size*10, shuffle=False, num_workers=2)


        # load model
        self.model = model
        
        '''
            self.model_name = ""
            for h in self.model.hidden_size:
            self.model_name+=str(h)+"_"
        '''
        
        if resume_from:
            self.model.load_state_dict(torch.load(resume_from+".pth"))
            self.starting_epoch = resume_from.split("_")[-1]
            self.starting_epoch = int(self.starting_epoch)+1
        else:
            self.starting_epoch = 1
            
        self.smallest_validation_error = avg_error=0.67
       
        self.batch_size = batch_size
        self.epochs = epochs
        
        if torch.cuda.is_available():
            self.device = torch.device("cuda:0")
            print("cuda device is available")
        else:
            self.device = torch.device("cpu")
            print("cuda device is NOT available")

        self.model = self.model.to(self.device)

        self.optimizer = optim.Adam(self.model.parameters())
        self.loss = nn.MSELoss()
        self.step_loss = []
        self.validation_error = {}
        
    def train(self):
        for epoch in range(self.starting_epoch, 
                           self.starting_epoch+self.epochs):
            self.train_one_epoch(epoch)
            self.validate(epoch)
            
    
    def train_one_epoch(self, epoch):
        self.model.train()
        print("Start epoch :{}".format(epoch))
        
        for step, (features, target) in enumerate(self.train_loader):
            features = features.to(self.device)
            target = target.to(self.device)
            
            h_0 = torch.zeros(1, features.shape[0], 
                              self.model.hidden_size).to(self.device)
            c_0 = torch.zeros(1, features.shape[0], 
                              self.model.hidden_size).to(self.device)
            
            output = self.model(features, h_0, c_0)
            
            loss = self.loss(output, target)
            
            self.step_loss.append(loss.detach().cpu().item())
  
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            if step % 10 == 0:
                # compute average error (relative to target)
                loss = loss.detach().cpu().item()
                error = torch.abs(output - target) / target
                error = torch.sum(error) / error.shape[0]
                error = error.cpu().item()
                
                message = "Epoch: {}, Step: {}, Loss: {:0.3f}, Error: {:0.2f}%".format(
                    epoch, step, loss, error*100)
                print(message)

    def validate(self, epoch):
        self.model.eval()
        cul_loss = []
        errors = []
        for features, target in self.val_loader:
            features = features.to(self.device)
            target = target.to(self.device)
                        
            h_0 = torch.zeros(1, features.shape[0], 
                              self.model.hidden_size).to(self.device)
            c_0 = torch.zeros(1, features.shape[0], 
                              self.model.hidden_size).to(self.device)
            
            output = self.model(features, h_0, c_0)
            
            cul_loss.append(
                self.loss(output, target).detach().cpu().item())
            
            error = torch.abs(output - target) / target
            error = torch.sum(error) / error.shape[0]
            errors.append(error.cpu().item())

        avg_loss = sum(cul_loss)/len(cul_loss)
        avg_error = sum(errors) /len(errors)
        
        message ="Epoch: {}, Validation Loss: {:0.2f}, Validation Error: {:0.2f}%".format(
            epoch, avg_loss, avg_error*100)
        print(message)
        
        self.validation_error["epoch_"+str(epoch)] = avg_error
    
 
        
        if avg_error < self.smallest_validation_error:
            print("Saving currently the best model")      
            self.save_model(epoch)
        
        return
    
    
    def save_model(self, epoch):
        # delete the previously saved model
        os.remove("lstm_model_*.pth")
        path = "lstm_model_epoch_"+str(epoch)+".pth"
        torch.save(self.model.state_dict(), path)
        
        return 



In [41]:
data_dir = "/scr1/li108/data/press_to_amp/"
file_names = ["HL_CL1_withair_dec1_data.mat"]

dtr = PressToAmpV2(data_dir, file_names)
dva = PressToAmpV2(data_dir,file_names, "validate")

model = ModelV2()
        
trainer = Trainer(dataset_train=dtr, dataset_val=dva, model=model, 
                 batch_size=32, epochs=10, 
                  resume_from="lstm_model_epoch_1")
trainer.train()

Size of training set is:3646
Size of validation set is:455
cuda device is NOT available
Start epoch :2
Epoch: 2, Step: 0, Loss: 5.725, Error: 52.47%
Epoch: 2, Step: 10, Loss: 9.448, Error: 59.40%
Epoch: 2, Step: 20, Loss: 4.644, Error: 57.18%
Epoch: 2, Step: 30, Loss: 7.467, Error: 46.30%
Epoch: 2, Step: 40, Loss: 6.974, Error: 72.28%
Epoch: 2, Step: 50, Loss: 7.867, Error: 55.95%
Epoch: 2, Step: 60, Loss: 6.014, Error: 73.75%
Epoch: 2, Step: 70, Loss: 2.856, Error: 55.55%
Epoch: 2, Step: 80, Loss: 4.799, Error: 61.77%
Epoch: 2, Step: 90, Loss: 2.959, Error: 73.81%
Epoch: 2, Step: 100, Loss: 3.408, Error: 69.80%
Epoch: 2, Step: 110, Loss: 1.493, Error: 57.56%
Epoch: 2, Validation Loss: 1.15, Validation Error: 240.56%
Start epoch :3
Epoch: 3, Step: 0, Loss: 5.025, Error: 84.94%
Epoch: 3, Step: 10, Loss: 3.774, Error: 73.81%
Epoch: 3, Step: 20, Loss: 3.251, Error: 73.47%
Epoch: 3, Step: 30, Loss: 1.305, Error: 108.18%
Epoch: 3, Step: 40, Loss: 6.172, Error: 87.27%
Epoch: 3, Step: 50, Los

In [29]:
L = DataLoader(dataset=dtr, batch_size=32, num_workers=2)
f, t = next(iter(L))

h_0 = torch.zeros(1, f.shape[0], model.hidden_size)

c_0 = torch.zeros(1, f.shape[0], model.hidden_size)
z = model(f, h_0, c_0)

optimizer = optim.Adam(model.parameters())
loss = nn.MSELoss()
sl = loss(t, z)
sl.backward()
optimizer.step()




tensor(0.7770, grad_fn=<MeanBackward1>)


In [36]:
torch.save(trainer.model.state_dict(), "lstm_model_epoch_1.pth")

In [1]:
from load_data import PressToAmpFFT

data_dir = "/scr1/li108/data/fft/"
file_names = ["HL_CL1_noair_dec1_datafft.mat"]

d = PressToAmpFFT(data_dir, file_names)

In [4]:
a, b, c = d[0]
c

array([ 5.0863773e-01,  0.0000000e+00, -4.5496278e-04, ...,
       -5.2113006e-05,  3.0260696e-04, -2.0377501e-04], dtype=float32)

In [6]:
d.data[0][35:]

array([ 5.08637704e-01+0.00000000e+00j, -4.54962783e-04-2.33944057e-03j,
       -2.66825852e-04+2.30783589e-03j, ...,
        1.65028067e-04-6.87596364e-05j, -7.96787022e-05-5.21130059e-05j,
        3.02606969e-04-2.03775011e-04j])