In [None]:
## ライブラリの読み込みなど

# for train test
import torch
import random
import numpy as np
import pandas as pd
import torch_scatter
import torch.nn as nn
from torch.nn import Linear, Sequential, LayerNorm, ReLU
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.data import DataLoader, Data
import time
import torch.optim as optim
from tqdm import trange
import copy
import matplotlib.pyplot as plt
import os
import torch
import h5py
import tensorflow.compat.v1 as tf
import functools
import json
import enum

# for make animation to mp4
from matplotlib import tri as mtri
from matplotlib import animation
import matplotlib.pyplot as plt
import numpy as np
import os 
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
#Utility functions, provided in the release of the code from the original MeshGraphNets study:
#https://github.com/deepmind/deepmind-research/tree/master/meshgraphnets

def triangles_to_edges(faces):
  """Computes mesh edges from triangles.
     Note that this triangles_to_edges method was provided as part of the
     code release for the MeshGraphNets paper by DeepMind, available here:
     https://github.com/deepmind/deepmind-research/tree/master/meshgraphnets
  """
  # collect edges from triangles
  edges = tf.concat([faces[:, 0:2],
                     faces[:, 1:3],
                     tf.stack([faces[:, 2], faces[:, 0]], axis=1)], axis=0)
  # those edges are sometimes duplicated (within the mesh) and sometimes
  # single (at the mesh boundary).
  # sort & pack edges as single tf.int64
  receivers = tf.reduce_min(edges, axis=1)
  senders = tf.reduce_max(edges, axis=1)
  packed_edges = tf.bitcast(tf.stack([senders, receivers], axis=1), tf.int64)
  # remove duplicates and unpack
  unique_edges = tf.bitcast(tf.unique(packed_edges)[0], tf.int32)
  senders, receivers = tf.unstack(unique_edges, axis=1)
  # create two-way connectivity
  return (tf.concat([senders, receivers], axis=0),
          tf.concat([receivers, senders], axis=0))

class NodeType(enum.IntEnum):
    """
    Define the code for the one-hot vector representing the node types.
    Note that this is consistent with the codes provided in the original
    MeshGraphNets study: 
    https://github.com/deepmind/deepmind-research/tree/master/meshgraphnets
    """
    NORMAL = 0
    OBSTACLE = 1
    AIRFOIL = 2
    HANDLE = 3
    INFLOW = 4
    OUTFLOW = 5
    WALL_BOUNDARY = 6
    SIZE = 9

class objectview(object):
    def __init__(self, d):
        self.__dict__ = d

In [None]:
## グローバル変数

# flag
use_processed_dataset = True # データの加工と保存はしときました．Trueで読み見込めます

#純変数
dt=0.01   #A constant: do not change!
best_model_rollout_index = -1

# 空リストなど
best_model_rollout_data = [] # best modelの各ステップのデータを格納するvisualize時に呼び出す

# pathの設定など1
root_dir = '/root'
dataset_dir = os.path.join(root_dir, 'datasets')
checkpoint_dir = os.path.join(root_dir, 'bachlor3-meshgraphnets/best_models')
postprocess_dir = os.path.join(root_dir, 'bachlor3-meshgraphnets/animations')

#pathの設定など2
train_datafile_path = os.path.join(dataset_dir, 'train.h5')
test_datafile_path = os.path.join(dataset_dir, 'test.h5')
valid_datafile_path = os.path.join(dataset_dir, 'valid.h5')

print("datafile directory: " + dataset_dir)

データセットの読み込み
- train_datasets: トレーニング用のデータセットの集合
- test_datasets: テスト用のデータセットの集合
- val_datasets: validation用のデータセットの集合

In [None]:
def make_datasets(data_file_path, tmax_raj=np.inf):
    datasets = []
    print('start reading ['+data_file_path+']')
    
    with h5py.File(data_file_path, 'r') as data:
        for i,trajectory in enumerate(data.keys()):
            print("Trajectory: ",i)
            dataset = []
            
            for ts in range(len(data[trajectory]['velocity'])-1):
                #Note that it's faster to convert to numpy then to torch than to
                #import to torch from h5 format directly
                momentum = torch.tensor(np.array(data[trajectory]['velocity'][ts]))
                #node_type = torch.tensor(np.array(data[trajectory]['node_type'][ts]))
                node_type = torch.tensor(np.array(tf.one_hot(tf.convert_to_tensor(data[trajectory]['node_type'][0]), NodeType.SIZE))).squeeze(1)
                x = torch.cat((momentum,node_type),dim=-1).type(torch.float)
                
                #Get edge indices in COO format
                edges = triangles_to_edges(tf.convert_to_tensor(np.array(data[trajectory]['cells'][ts])))

                edge_index = torch.cat( (torch.tensor(edges[0].numpy()).unsqueeze(0) ,
                            torch.tensor(edges[1].numpy()).unsqueeze(0)), dim=0).type(torch.long)
                
                #Get edge features
                u_i=torch.tensor(np.array(data[trajectory]['mesh_pos'][ts]))[edge_index[0]]
                u_j=torch.tensor(np.array(data[trajectory]['mesh_pos'][ts]))[edge_index[1]]
                u_ij=u_i-u_j
                u_ij_norm = torch.norm(u_ij,p=2,dim=1,keepdim=True)
                edge_attr = torch.cat((u_ij,u_ij_norm),dim=-1).type(torch.float)
                
                #Node outputs, for training (velocity)
                v_t=torch.tensor(np.array(data[trajectory]['velocity'][ts]))
                v_tp1=torch.tensor(np.array(data[trajectory]['velocity'][ts+1]))
                y=((v_tp1-v_t)/dt).type(torch.float)

                #Node outputs, for testing integrator (pressure)
                p=torch.tensor(np.array(data[trajectory]['pressure'][ts]))

                #Data needed for visualization code
                cells=torch.tensor(np.array(data[trajectory]['cells'][ts]))
                mesh_pos=torch.tensor(np.array(data[trajectory]['mesh_pos'][ts]))
                
                dataset.append(Data(x=x, edge_index=edge_index, edge_attr=edge_attr,y=y,p=p,
                                cells=cells,mesh_pos=mesh_pos))
            
            datasets.append(dataset)
            if(i+1 == tmax_raj):
                break
    print('finish reading ['+data_file_path+']')
    return datasets

In [None]:
def make_loader(datasets, batch_size, shuffle, use_dataset_size=None):
    data_list = []
    current_dataset_size = 0
    load_next = True
    if use_dataset_size is None:
        use_dataset_size = len(datasets) * len(datasets[0])
        
    for dataset in datasets:
        for data in dataset:
            data_list.append(data)
            current_dataset_size+=1
            if current_dataset_size == use_dataset_size:
                load_next = False
                break
        if not load_next:
            break
    
    loader = DataLoader(data_list, batch_size=batch_size, shuffle=shuffle)
    return loader

In [None]:
# datasetの準備or読み込み
print('use_processed_dataset: {}'.format(use_processed_dataset))
if use_processed_dataset:
    # 読み込みに 5 分くらいかかる
    print('start loading [train_datasets]')
    train_datasets = torch.load(os.path.join(dataset_dir + '/train_processed_data_set.pt'))
    print('start loading [test_datasets]')
    test_datasets = torch.load(os.path.join(dataset_dir + '/test_processed_data_set.pt'))
    print('start loading [valid_datasets]')
    valid_datasets = torch.load(os.path.join(dataset_dir + '/valid_processed_data_set.pt'))
    print('finish loading')

else:    
    train_datasets = make_datasets(train_datafile_path)
    test_datasets = make_datasets(test_datafile_path)
    valid_datasets = make_datasets(valid_datafile_path)
    torch.save(train_datasets, os.path.join(dataset_dir + '/train_processed_data_set.pt'))
    torch.save(test_datasets, os.path.join(dataset_dir + '/test_processed_data_set.pt'))
    torch.save(valid_datasets, os.path.join(dataset_dir + '/valid_processed_data_set.pt'))

In [None]:
# データの扱いの参考
print('- 訓練用データのトラジェクトリ数: {}'.format(len(train_datasets)))
print('- 1トラジェクトリのタイムステップ数: {}'.format(len(train_datasets[0])))
print('- 一ステップのデータのサイズ')
print(train_datasets[0][0])
print('- 一ステップのデータのx(全ノードのノード特徴量)の一例')
print(train_datasets[0][0].x)
print('- 一ステップのデータのxのあるノードの次元(x_axis_vero, y_axis_vero, (one_hot_vec size = 9))')
print(len(train_datasets[0][0].x[0]))
print('- 一ステップのデータのxのあるノードの一例')
print(train_datasets[0][0].x[0])

In [None]:
def normalize(to_normalize,mean_vec,std_vec):
    return (to_normalize-mean_vec)/std_vec

def unnormalize(to_unnormalize,mean_vec,std_vec):
    return to_unnormalize*std_vec+mean_vec

def get_stats(data_list):
    '''
    Method for normalizing processed datasets. Given  the processed data_list, 
    calculates the mean and standard deviation for the node features, edge features, 
    and node outputs, and normalizes these using the calculated statistics.
    '''

    #mean and std of the node features are calculated
    mean_vec_x=torch.zeros(data_list[0].x.shape[1:])
    std_vec_x=torch.zeros(data_list[0].x.shape[1:])

    #mean and std of the edge features are calculated
    mean_vec_edge=torch.zeros(data_list[0].edge_attr.shape[1:])
    std_vec_edge=torch.zeros(data_list[0].edge_attr.shape[1:])

    #mean and std of the output parameters are calculated
    mean_vec_y=torch.zeros(data_list[0].y.shape[1:])
    std_vec_y=torch.zeros(data_list[0].y.shape[1:])

    #Define the maximum number of accumulations to perform such that we do
    #not encounter memory issues
    max_accumulations = 10**6

    #Define a very small value for normalizing to 
    eps=torch.tensor(1e-8)

    #Define counters used in normalization
    num_accs_x = 0
    num_accs_edge=0
    num_accs_y=0

    #Iterate through the data in the list to accumulate statistics
    for dp in data_list:

        #Add to the 
        mean_vec_x+=torch.sum(dp.x,dim=0)
        std_vec_x+=torch.sum(dp.x**2,dim=0)
        num_accs_x+=dp.x.shape[0]

        mean_vec_edge+=torch.sum(dp.edge_attr,dim=0)
        std_vec_edge+=torch.sum(dp.edge_attr**2,dim=0)
        num_accs_edge+=dp.edge_attr.shape[0]

        mean_vec_y+=torch.sum(dp.y,dim=0)
        std_vec_y+=torch.sum(dp.y**2,dim=0)
        num_accs_y+=dp.y.shape[0]

        if(num_accs_x>max_accumulations or num_accs_edge>max_accumulations or num_accs_y>max_accumulations):
            break

    mean_vec_x = mean_vec_x/num_accs_x
    std_vec_x = torch.maximum(torch.sqrt(std_vec_x/num_accs_x - mean_vec_x**2),eps)

    mean_vec_edge = mean_vec_edge/num_accs_edge
    std_vec_edge = torch.maximum(torch.sqrt(std_vec_edge/num_accs_edge - mean_vec_edge**2),eps)

    mean_vec_y = mean_vec_y/num_accs_y
    std_vec_y = torch.maximum(torch.sqrt(std_vec_y/num_accs_y - mean_vec_y**2),eps)

    mean_std_list=[mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge,mean_vec_y,std_vec_y]

    return mean_std_list

def get_stats_from_datasets(datasets):
    full_data_list = []
    for dataset in datasets:
        for data in dataset:
            full_data_list.append(data)
    stats_list = get_stats(full_data_list)
    return stats_list

In [None]:
class ProcessorLayer(MessagePassing):
    def __init__(self, in_channels, out_channels,  **kwargs):
        super(ProcessorLayer, self).__init__(  **kwargs )
        """
        in_channels: dim of node embeddings [128], out_channels: dim of edge embeddings [128]

        """

        # Note that the node and edge encoders both have the same hidden dimension
        # size. This means that the input of the edge processor will always be
        # three times the specified hidden dimension
        # (input: adjacent node embeddings and self embeddings)
        self.edge_mlp = Sequential(Linear( 3* in_channels , out_channels),
                                   ReLU(),
                                   Linear( out_channels, out_channels),
                                   LayerNorm(out_channels))

        self.node_mlp = Sequential(Linear( 2* in_channels , out_channels),
                                   ReLU(),
                                   Linear( out_channels, out_channels),
                                   LayerNorm(out_channels))


        self.reset_parameters()

    def reset_parameters(self):
        """
        reset parameters for stacked MLP layers
        """
        self.edge_mlp[0].reset_parameters()
        self.edge_mlp[2].reset_parameters()

        self.node_mlp[0].reset_parameters()
        self.node_mlp[2].reset_parameters()

    def forward(self, x, edge_index, edge_attr, size = None):
        """
        Handle the pre and post-processing of node features/embeddings,
        as well as initiates message passing by calling the propagate function.

        Note that message passing and aggregation are handled by the propagate
        function, and the update

        x has shpae [node_num , in_channels] (node embeddings)
        edge_index: [2, edge_num]
        edge_attr: [E, in_channels]

        """

        out, updated_edges = self.propagate(edge_index, x = x, edge_attr = edge_attr, size = size) # out has the shape of [E, out_channels]

        updated_nodes = torch.cat([x,out],dim=1)        # Complete the aggregation through self-aggregation

        updated_nodes = x + self.node_mlp(updated_nodes) # residual connection

        return updated_nodes, updated_edges

    def message(self, x_i, x_j, edge_attr):
        """
        source_node: x_i has the shape of [E, in_channels]
        target_node: x_j has the shape of [E, in_channels]
        target_edge: edge_attr has the shape of [E, out_channels]

        The messages that are passed are the raw embeddings. These are not processed.
        """

        updated_edges=torch.cat([x_i, x_j, edge_attr], dim = 1) # tmp_emb has the shape of [E, 3 * in_channels]
        updated_edges=self.edge_mlp(updated_edges)+edge_attr

        return updated_edges

    def aggregate(self, updated_edges, edge_index, dim_size = None):
        """
        First we aggregate from neighbors (i.e., adjacent nodes) through concatenation,
        then we aggregate self message (from the edge itself). This is streamlined
        into one operation here.
        """

        # The axis along which to index number of nodes.
        node_dim = 0

        out = torch_scatter.scatter(updated_edges, edge_index[0, :], dim=node_dim, reduce = 'sum')

        return out, updated_edges

In [None]:
class MeshGraphNet(torch.nn.Module):
    def __init__(self, input_dim_node, input_dim_edge, hidden_dim, output_dim, args, emb=False):
        super(MeshGraphNet, self).__init__()
        """
        MeshGraphNet model. This model is built upon Deepmind's 2021 paper.
        This model consists of three parts: (1) Preprocessing: encoder (2) Processor
        (3) postproccessing: decoder. Encoder has an edge and node decoders respectively.
        Processor has two processors for edge and node respectively. Note that edge attributes have to be
        updated first. Decoder is only for nodes.

        Input_dim: dynamic variables + node_type + node_position
        Hidden_dim: 128 in deepmind's paper
        Output_dim: dynamic variables: velocity changes (1)

        """

        self.num_layers = args.num_layers

        # encoder convert raw inputs into latent embeddings
        self.node_encoder = Sequential(Linear(input_dim_node , hidden_dim),
                              ReLU(),
                              Linear( hidden_dim, hidden_dim),
                              LayerNorm(hidden_dim))

        self.edge_encoder = Sequential(Linear( input_dim_edge , hidden_dim),
                              ReLU(),
                              Linear( hidden_dim, hidden_dim),
                              LayerNorm(hidden_dim)
                              )


        self.processor = nn.ModuleList()
        assert (self.num_layers >= 1), 'Number of message passing layers is not >=1'

        processor_layer=self.build_processor_model()
        for _ in range(self.num_layers):
            self.processor.append(processor_layer(hidden_dim,hidden_dim))


        # decoder: only for node embeddings
        self.decoder = Sequential(Linear( hidden_dim , hidden_dim),
                              ReLU(),
                              Linear( hidden_dim, output_dim)
                              )


    def build_processor_model(self):
        return ProcessorLayer


    def forward(self,data,mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge):
        """
        Encoder encodes graph (node/edge features) into latent vectors (node/edge embeddings)
        The return of processor is fed into the processor for generating new feature vectors
        """
        x, edge_index, edge_attr, pressure = data.x, data.edge_index, data.edge_attr, data.p

        x = normalize(x,mean_vec_x,std_vec_x)
        edge_attr=normalize(edge_attr,mean_vec_edge,std_vec_edge)

        # Step 1: encode node/edge features into latent node/edge embeddings
        x = self.node_encoder(x) # output shape is the specified hidden dimension

        edge_attr = self.edge_encoder(edge_attr) # output shape is the specified hidden dimension

        # step 2: perform message passing with latent node/edge embeddings
        for i in range(self.num_layers):
            x,edge_attr = self.processor[i](x,edge_index,edge_attr)

        # step 3: decode latent node embeddings into physical quantities of interest

        return self.decoder(x)

    def loss(self, pred, inputs,mean_vec_y,std_vec_y):
        #Define the node types that we calculate loss for
        normal=torch.tensor(NodeType.NORMAL)
        outflow=torch.tensor(NodeType.OUTFLOW)

        #Get the loss mask for the nodes of the types we calculate loss for
        loss_mask=torch.logical_or((torch.argmax(inputs.x[:,2:],dim=1) == normal),
                                   (torch.argmax(inputs.x[:,2:],dim=1) == outflow))

        #Normalize labels with dataset statistics
        labels = normalize(inputs.y,mean_vec_y,std_vec_y)

        #Find sum of square errors
        error=torch.sum((labels-pred)**2,axis=1)

        #Root and mean the errors for the nodes we calculate loss for
        loss=torch.sqrt(torch.mean(error[loss_mask]))
        
        return loss

In [None]:
def build_optimizer(args, params):
    weight_decay = args.weight_decay
    filter_fn = filter(lambda p : p.requires_grad, params)
    if args.opt == 'adam':
        optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
    elif args.opt == 'sgd':
        optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
    elif args.opt == 'adagrad':
        optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
    if args.opt_scheduler == 'none':
        return None, optimizer
    elif args.opt_scheduler == 'step':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
    elif args.opt_scheduler == 'cos':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
    return scheduler, optimizer

In [None]:
train_loader = make_loader(train_datasets, 2, shuffle=False)
sample_data = next(iter(train_loader))
print(sample_data.x.shape[1])
print(sample_data.edge_attr.shape[1])
print(sample_data.y.shape[1])
# the dynamic variables have the shape of 2 (velocity)

In [None]:
def train(device, train_stats_list, valid_stats_list, args):
    '''
    Performs a training loop on the dataset for MeshGraphNets. Also calls
    test and validation functions.
    '''

    df = pd.DataFrame(columns=['epoch', 'train_loss', 'test_loss', 'velo_val_loss'])

    #Define the model name for saving 
    # model_name='model_nl'+str(args.num_layers)+'_bs'+str(args.batch_size) + \
    #            '_hd'+str(args.hidden_dim)+'_ep'+str(args.epochs)+'_wd'+str(args.weight_decay) + \
    #            '_lr'+str(args.lr)+'_shuff_'+str(args.shuffle)+'_tr'+str(args.train_size)+'_te'+str(args.test_size)
    model_name =    'model_nl'  +str(args.num_layers) + \
                    '_bs'       +str(args.batch_size) + \
                    '_hd'       +str(args.hidden_dim) + \
                    '_ep'       +str(args.epochs) + \
                    '_wd'       +str(args.weight_decay) + \
                    '_lr'       +str(args.lr) + \
                    '_shuff_'   +str(args.shuffle) + \
                    '_tr'       +str(args.train_size) + \
                    '_te'       +str(args.valid_size) ##_vaに変え忘れた

    #torch_geometric DataLoaders are used for handling the data of lists of graphs
    train_loader = make_loader(train_datasets, args.batch_size, shuffle=False, use_dataset_size=args.train_size)
    valid_loader = make_loader(valid_datasets, args.batch_size, shuffle=False, use_dataset_size=args.valid_size)

    #The statistics of the data are decomposed
    [mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge,mean_vec_y,std_vec_y] = train_stats_list
    (mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge,mean_vec_y,std_vec_y)=(mean_vec_x.to(device),
        std_vec_x.to(device),mean_vec_edge.to(device),std_vec_edge.to(device),mean_vec_y.to(device),std_vec_y.to(device))

    # build model
    sample_data = next(iter(train_loader))
    num_node_features = sample_data.x.shape[1] # 11(x dimension)
    num_edge_features = sample_data.edge_attr.shape[1] # 3(edge_attr dimension)
    num_classes = sample_data.y.shape[1] # 2(y dimension)

    model = MeshGraphNet(input_dim_node=num_node_features,
                         input_dim_edge=num_edge_features, 
                         hidden_dim=args.hidden_dim, 
                         output_dim=num_classes,
                         args=args).to(device)
    scheduler, opt = build_optimizer(args=args, params=model.parameters())

    # train
    losses = []
    test_losses = []
    velo_val_losses = []
    best_test_loss = np.inf
    best_model = None
    
    # stop=args.epochs: トレーニングのエポック数。これは訓練プロセス何エポック分実行するかを指定(5000?)
    # desc="Training": プログレスバーの表示名。ここでは "Training" と表示されます。
    # unit="Epochs": プログレスバーの単位。ここでは "Epochs" と表示されます。
    for epoch in trange(args.epochs, desc="Training", unit="Epochs"):
        total_loss = 0
        model.train()
        num_loops=0
        for batch in train_loader:
            #Note that normalization must be done before it's called. The unnormalized
            #data needs to be preserved in order to correctly calculate the loss
            batch=batch.to(device)
            opt.zero_grad()         #zero gradients each time
            pred = model(batch,mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge)
            loss = model.loss(pred,batch,mean_vec_y,std_vec_y)
            loss.backward()         #backpropagate loss
            opt.step()
            total_loss += loss.item()
            num_loops+=1
        total_loss /= num_loops
        losses.append(total_loss)

        #Every tenth epoch, calculate acceleration test loss and velocity validation loss
        if epoch % 10 == 0:
            if (args.save_velo_val):
                # save velocity evaluation
                # test_loss, velo_val_rmse = test(valid_loader,device,model,mean_vec_x,std_vec_x,mean_vec_edge,
                #                  std_vec_edge,mean_vec_y,std_vec_y, args.save_velo_val)
                test_loss, velo_val_rmse = test(valid_loader,
                                                device,
                                                model,
                                                valid_stats_list, # valid用で良いのか，train用のを渡すのか...
                                                args.save_velo_val)
                velo_val_losses.append(velo_val_rmse.item())
            else:
                test_loss, _ = test(valid_loader,
                                    device,
                                    model,
                                    valid_stats_list, # valid用で良いのか，train用のを渡すのか...
                                    args.save_velo_val)

            test_losses.append(test_loss.item())

            # saving model
            if not os.path.isdir( args.checkpoint_dir ):
                os.mkdir(args.checkpoint_dir)

            PATH = os.path.join(args.checkpoint_dir, model_name+'.csv')
            df.to_csv(PATH,index=False)

            #save the model if the current one is better than the previous best
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                best_model = copy.deepcopy(model)

        else:
            #If not the tenth epoch, append the previously calculated loss to the
            #list in order to be able to plot it on the same plot as the training losses
            if (args.save_velo_val):
              test_losses.append(test_losses[-1])
              velo_val_losses.append(velo_val_losses[-1])

        if (args.save_velo_val):
            # df = df.append({'epoch': epoch,'train_loss': losses[-1],
            #                 'test_loss':test_losses[-1],
            #                'velo_val_loss': velo_val_losses[-1]}, ignore_index=True)
            new_row = pd.DataFrame({'epoch': [epoch],
                    'train_loss': [losses[-1]],
                    'test_loss': [test_losses[-1]],
                    'velo_val_loss': [velo_val_losses[-1]]})
            df = pd.concat([df, new_row], ignore_index=True)
        else:
            # df = df.append({'epoch': epoch, 'train_loss': losses[-1], 'test_loss': test_losses[-1]}, ignore_index=True)
            new_row = pd.DataFrame({'epoch': [epoch],
                    'train_loss': [losses[-1]],
                    'test_loss': [test_losses[-1]]})
            df = pd.concat([df, new_row], ignore_index=True)
        
        # 100 Epoch ごとにモデルを保存しておく
        if(epoch%100==0):
            if (args.save_velo_val):
                print("train loss", str(round(total_loss, 2)),
                      "test loss", str(round(test_loss.item(), 2)),
                      "velo loss", str(round(velo_val_rmse.item(), 5)))
            else:
                print("train loss", str(round(total_loss,2)), "test loss", str(round(test_loss.item(),2)))


            if(args.save_best_model):

                PATH = os.path.join(args.checkpoint_dir, model_name+'.pt')
                torch.save(best_model.state_dict(), PATH )
    
    # valid_loader(もとは test_loader) を返す必要不明
    return test_losses, losses, velo_val_losses, best_model, best_test_loss, valid_loader

In [None]:
def test(loader,device,test_model,
         stats_list, is_validation,
          delta_t=0.01, save_model_preds=False, model_type=None):
  
    '''
    Calculates test set losses and validation set errors.
    '''

    loss=0
    velo_rmse = 0
    num_loops=0
    
    [mean_vec_x, std_vec_x,
     mean_vec_edge, std_vec_edge,
     mean_vec_y, std_vec_y] = stats_list
    (mean_vec_x, std_vec_x,
     mean_vec_edge, std_vec_edge,
     mean_vec_y, std_vec_y) = (mean_vec_x.to(device), std_vec_x.to(device),
                               mean_vec_edge.to(device), std_vec_edge.to(device),
                               mean_vec_y.to(device), std_vec_y.to(device))

    for data in loader:
        data=data.to(device)
        with torch.no_grad():

            #calculate the loss for the model given the test set
            pred = test_model(data,mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge)
            loss += test_model.loss(pred, data,mean_vec_y,std_vec_y)

            #calculate validation error if asked to
            if (is_validation):

                #Like for the MeshGraphNets model, calculate the mask over which we calculate
                #flow loss and add this calculated RMSE value to our val error
                normal=torch.tensor(NodeType.NORMAL)
                outflow=torch.tensor(NodeType.OUTFLOW)
                loss_mask = torch.logical_or((torch.argmax(data.x[:, 2:], dim=1) == normal),
                                             (torch.argmax(data.x[:, 2:], dim=1) == outflow))

                eval_velo = data.x[:, 0:2] + unnormalize( pred[:], mean_vec_y, std_vec_y ) * delta_t
                gs_velo = data.x[:, 0:2] + data.y[:] * delta_t
                
                error = torch.sum((eval_velo - gs_velo) ** 2, axis=1)
                velo_rmse += torch.sqrt(torch.mean(error[loss_mask]))

        num_loops+=1
        # if velocity is evaluated, return velo_rmse as 0
    return loss/num_loops, velo_rmse/num_loops

ここまでで学習の準備 終了

ここからモデルをトレーニングして，**最良のモデル**を探す

In [None]:
for args in [
        {'model_type': 'meshgraphnet',  
         'num_layers': 10,
         'batch_size': 2, # もとは16 
         'hidden_dim': 10, 
         'epochs': 500,
        #  'epochs': 5000,
         'opt': 'adam', 
         'opt_scheduler': 'none', 
         'opt_restart': 0, 
         'weight_decay': 5e-4, 
         'lr': 0.001,
         'train_size': 45, # 問題なく動けば len(train_datases)*len(train_datases[0]) 
        #  'test_size': 10, 
         'valid_size': 10, # 問題なく動けば len(valid_datases)*len(valid_datases[0]) 
         'device':'cuda',
         'shuffle': True, 
         'save_velo_val': True,
         'save_best_model': True, 
         'checkpoint_dir': './best_models/', # models/best_modelsとかのほうが分かりやすい？
         'postprocess_dir': './2d_loss_plots/'},
    ]:
        args = objectview(args)
        
device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.device = device
print(device)

torch.manual_seed(5)  #Torch
random.seed(5)        #Python
np.random.seed(5)     #NumPy

In [None]:
train_stats_list = get_stats_from_datasets(train_datasets)
valid_stats_list = get_stats_from_datasets(valid_datasets)

In [None]:
test_losses, losses, velo_val_losses, best_model, best_test_loss, _ = train(device, train_stats_list, valid_stats_list, args)
# test_losses, losses, velo_val_losses, best_model, best_test_loss, test_loader = train(device, train_stats_list, valid_stats_list, args)

print("Min test set loss: {0}".format(min(test_losses)))
print("Minimum loss: {0}".format(min(losses)))
if (args.save_velo_val):
    print("Minimum velocity validation loss: {0}".format(min(velo_val_losses)))

In [None]:
def save_plots(args, losses, test_losses, velo_val_losses):
    # model_name='model_nl'+str(args.num_layers)+'_bs'+str(args.batch_size) + \
    #            '_hd'+str(args.hidden_dim)+'_ep'+str(args.epochs)+'_wd'+str(args.weight_decay) + \
    #            '_lr'+str(args.lr)+'_shuff_'+str(args.shuffle)+'_tr'+str(args.train_size)+'_te'+str(args.test_size)
    model_name='model_nl'+str(args.num_layers)+'_bs'+str(args.batch_size) + \
               '_hd'+str(args.hidden_dim)+'_ep'+str(args.epochs)+'_wd'+str(args.weight_decay) + \
               '_lr'+str(args.lr)+'_shuff_'+str(args.shuffle)+'_tr'+str(args.train_size)+'_te'+str(args.valid_size)


    if not os.path.isdir(args.postprocess_dir):
        os.mkdir(args.postprocess_dir)

    PATH = os.path.join(args.postprocess_dir, model_name + '.pdf')

    f = plt.figure()
    plt.title('Losses Plot')
    plt.plot(losses, label="training loss" + " - " + args.model_type)
    plt.plot(test_losses, label="valid loss" + " - " + args.model_type)
    # plt.plot(test_losses, label="test loss" + " - " + args.model_type)
    #if (args.save_velo_val):
    #    plt.plot(velo_val_losses, label="velocity loss" + " - " + args.model_type)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')

    plt.legend()
    plt.show()
    f.savefig(PATH, bbox_inches='tight')

In [None]:
save_plots(args, losses, test_losses, velo_val_losses)

ここからテスト

In [None]:
def rollout_test(args, test_stats_list, use_traj_size=None, delta_t=0.01, save_model_preds=False, model_type=None):

    
    if use_traj_size is None:
        use_traj_size = len(test_datasets)
    if use_traj_size > len(test_datasets):
        print('use_traj_size is too big')
        return
    use_test_datasets = test_datasets[:use_traj_size]
        
    device = 'cpu'
    # device = args.device
    print('rollout device is {}'.format(device))
    
    #The statistics of the data are decomposed
    [mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge,mean_vec_y,std_vec_y] = test_stats_list
    (mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge,mean_vec_y,std_vec_y)=(mean_vec_x.to(device),
        std_vec_x.to(device),mean_vec_edge.to(device),std_vec_edge.to(device),mean_vec_y.to(device),std_vec_y.to(device))
    
    rollout_traj_list = [] # 各要素がrollout_dataのリスト
    rollout_loss_list = [] # 各要素が各トラジェクトリの全ステップのロスの平均
    
    sample_data = test_datasets[0][0]
    
    num_node_features = sample_data.x.shape[1] # 11(x dimension)
    num_edge_features = sample_data.edge_attr.shape[1] # 3(edge_attr dimension)
    num_classes = sample_data.y.shape[1] # 2(y dimension)
    
    best_model = MeshGraphNet(num_node_features,
                         num_edge_features,
                         args.hidden_dim,
                         num_classes,
                         args).to(device)
    model_name ='model_nl'  +str(args.num_layers) + \
                '_bs'       +str(args.batch_size) + \
                '_hd'       +str(args.hidden_dim) + \
                '_ep'       +str(args.epochs) + \
                '_wd'       +str(args.weight_decay) + \
                '_lr'       +str(args.lr) + \
                '_shuff_'   +str(args.shuffle) + \
                '_tr'       +str(args.train_size) + \
                '_te'       +str(args.valid_size) ##_vaに変え忘れた
    bese_model_filename = model_name + '.pt'
    
    checkpoint_dir='./best_models/' # argsのせいでこうなってる
    PATH = os.path.join( checkpoint_dir, bese_model_filename)
    best_model.load_state_dict(torch.load(PATH, map_location=args.device))
    
    for i, one_trajectory in enumerate(use_test_datasets):
        one_trajectory_loss = 0
        rollout_traj_x = []
        
        print('~~ start rollout No.{} ~~'.format(i))
        for ts, one_step in enumerate(one_trajectory):
            current_step = one_step
            current_step = current_step.to(device)
            if ts != 0:
                current_step.x = pred_x
            
            pred_y = best_model(current_step,mean_vec_x,std_vec_x,mean_vec_edge,std_vec_edge)
            one_trajectory_loss += best_model.loss(pred_y, one_step,  mean_vec_y, std_vec_y)
            pred_y = pred_y.to(device)
            
            padded_pred_y = torch.zeros((pred_y.size(0), num_node_features), device= device)
            padded_pred_y[:, :2] = pred_y
            
            displace_x = padded_pred_y * delta_t
            pred_x = current_step.x + displace_x
            
            rollout_traj_x.append(pred_x) # ts = 1~600になる．gsとかも予測(ts=1~600)でしてたから，そのまま読めばOK
            
        avg_one_trajectory_loss = one_trajectory_loss/len(one_trajectory)
        rollout_traj_list.append(rollout_traj_x)
        rollout_loss_list.append(avg_one_trajectory_loss)
        print('   finish. loss avg={}'.format(avg_one_trajectory_loss))
        
    return rollout_traj_list, rollout_loss_list

In [None]:
test_stats_list = train_stats_list
rollout_traj_list, rollout_loss_list = rollout_test(args, test_stats_list, use_traj_size=3)

rollout_traj_list について
- rollout_traj_list[0 \: max_traj] : ロールアウトのトラジェクトリの集合
- rollout_traj_list[0] : トラジェクトリ0のロールアウトデータ(ts=1~600であることに注意)
- rollout_traj_list[0][0] : トラジェクトリ0 のts=1の全ノードのデータ
- rollout_traj_list[0][0][0][:2] : トラジェクトリ0 のts=1のノード0のデータの速度のx成分とy成分

rollout_loss_listについて
- rollout_loss_list[0 \: max_traj] : ロールアウトの「トラジェクトリごとのロス平均」の集合
- rollout_loss_list[0] : ロールアウトのトラジェクトリ0のロスの平均(ts=1~600のロスの平均)

アニメーションについて
- とりあえず，ロスの平均が最小，中央，最大のものについてアニメーション化すると良い...？
- 表示するものは colab と同様に gs, pred, errで良い？
- min, medi, maxあたりで名前を変えてmp4で保存
- 可能なら不要な部分切る