In [1]:
# Standard libraries
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
plt.rcParams['figure.dpi'] = 300
import random
import csv
import pandas as pd
import h5py
# Scikit learn libraries
from sklearn.model_selection import train_test_split
# PyTorch libraries
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Normalize 
# Own scripts:
%load_ext autoreload
%autoreload 2
import physics
import data
import nnc2p
# from nnc2p import NeuralNetwork # our own architecture
# Get dirs
import os
cwd = os.getcwd()# "Code" folder
master_dir = os.path.abspath(os.path.join(cwd, ".."))

Point towards the folder where we store the eos tables (__Note:__ they are not in the Github as these are very large files)

In [2]:
eos_tables_folder = os.path.join("D:\Coding\eos_tables")

# Introduction

Here, we try to find a way to generalize the NN approach from the first semester to the situation of tabular EOS. More work coming soon!

# Exploring EOS tables

In [3]:
# Put the downloaded EOS tables here
first_table_filename       = "LS180_234r_136t_50y_analmu_20091212_SVNr26.h5"
second_table_filename = "GShen_NL3EOS_rho280_temp180_ye52_version_1.1_20120817.h5"
third_table_filename      = "SLy4_0000_rho391_temp163_ye66.h5"
# Then specify which we are going to use here
eos_table_filename = third_table_filename

Read in the SLy4 EOS table using our py script:

In [9]:
eos_table = physics.read_eos_table(eos_table_filename)
dim_ye, dim_temp, dim_rho = eos_table["pointsye"][()][0], eos_table["pointstemp"][()][0], eos_table["pointsrho"][()][0]
print(f"This EOS table has dimensions {dim_ye} x {dim_temp} x {dim_rho}")

This EOS table has dimensions 66 x 163 x 391


In [11]:
np.shape(eos_table["logenergy"][()])  # same dimension

(66, 163, 391)

In [12]:
# Small test to see the output of the EOS table
test_ye = eos_table["ye"][()][0]
test_temp = eos_table["logtemp"][()][0]
test_rho = eos_table["logrho"][()][0]
test_press = eos_table["logpress"][()][0, 0, 0]
print(f"For ye {test_ye}, log temp {test_temp}, log rho {test_rho}, we have log p: {test_press}.")

For ye 0.005, log temp -3.0, log rho 3.0239960056064277, we have log p: 17.99956975587081.


# Generating training data by sampling from EOS table

To generate new data

In [13]:
# dat = physics.generate_tabular_data(eos_table, number_of_points = 100000, save_name = "SLy4_training_data")
# dat = physics.generate_tabular_data(eos_table, number_of_points = 20000, save_name = "SLy4_test_data")

Load data

In [24]:
df = pd.read_csv(os.path.join(master_dir, "Data/SLy4_training_data.csv"))
df

Unnamed: 0,rho,eps,v,temp,ye,p,D,S,tau
0,13.590663,19.381445,0.683568,-0.333333,0.045,31.985334,18.620245,580.748110,655.358209
1,6.457329,28.786040,0.041886,2.166667,0.545,34.766248,6.463001,13.269992,186.431092
2,11.323996,19.654200,0.558550,0.866667,0.535,30.418591,13.652070,310.329455,393.570454
3,8.723996,19.246855,0.009527,-0.300000,0.095,26.341137,8.724392,2.749496,167.935288
4,5.523996,29.186040,0.334705,2.033333,0.095,34.232914,5.862105,103.365923,195.482542
...,...,...,...,...,...,...,...,...,...
99995,3.323996,25.652376,0.577186,0.600000,0.435,28.498601,4.070468,125.881369,157.178893
99996,9.757329,19.124371,0.376611,-2.900000,0.295,27.371010,10.532847,140.768515,238.842241
99997,9.590663,19.114310,0.266570,-0.833333,0.305,27.176186,9.950723,90.427775,207.064143
99998,14.690663,20.976110,0.476948,2.233333,0.485,35.391058,16.714229,326.155098,461.688397


__TODO__ delete -- Get the mean for normalization

In [39]:
# input_channels = np.array([df["D"], df["S"], df["tau"]]) 
# tabular_mean = np.mean(input_channels, axis=1)
# tabular_std  = np.std(input_channels, axis=1)
# print(f"Mean is {tabular_mean}, std is {tabular_std}")

Mean is [ 10.61740893 171.68079015 283.8404585 ], std is [  4.38641801 155.12454904 147.9843713 ]


In [75]:
# # Normalize the input data, if not done already (check this by checking if mean is smaller than 1e-3)
# if abs(np.mean(df["S"])) > 1e-3:
#     for i, var in enumerate(["D", "S", "tau"]):
#         df[var] = (df[var] - tabular_mean[i])/tabular_std[i]

For comparison, this was the table we trained on for the ideal gas EOS:

In [15]:
old_df = pd.read_csv(os.path.join(master_dir, "Data/ideal_gas_c2p_train_data.csv"))
old_df

Unnamed: 0,rho,eps,v,p,D,S,tau
0,0.662984,0.084146,0.218802,0.037192,0.679448,0.173724,0.077335
1,8.565808,0.205945,0.657351,1.176059,11.366755,13.318537,7.718100
2,4.387112,1.598809,0.021593,4.676103,4.388135,0.347321,7.020631
3,5.337054,0.530803,0.351307,1.888615,5.700396,4.031171,3.885760
4,1.133895,0.786717,0.079475,0.594703,1.137493,0.209600,0.905115
...,...,...,...,...,...,...,...
79995,8.101834,0.428605,0.616897,2.314990,10.294002,13.832316,9.813427
79996,7.841014,1.125480,0.209087,5.883268,8.018242,4.930289,9.678536
79997,4.628822,0.194190,0.237759,0.599248,4.765476,1.544018,1.129323
79998,9.913117,1.152242,0.477216,7.614874,11.280468,17.889657,18.592193


# NNC2P

## Define architecture and prepare data

In [17]:
class NNC2P(nn.Module):
    """
    Implements a neural network for the C2P conversion, using tabulated SLy4 EOS.
    """
    def __init__(self, h: list = [600, 200], reg: bool = False, activation_function = torch.nn.Sigmoid) -> None:
        """
        Initialize the neural network class.
        """
        # Call the super constructor first
        super(NNC2P, self).__init__()
        
        # For convenience, save the sizes of the hidden layers as fields as well
        self.h = h
        # Add visible layers as well: input is 3D and output is 1D
        self.h_augmented = [3] + h + [1]

        # Add field to specify whether or not we do regularization
        self.regularization = reg

        # Define the layers:
        for i in range(len(self.h_augmented)-1):
            if i == len(self.h_augmented)-2:
                setattr(self, f"linear{i+1}", nn.Linear(self.h_augmented[i], self.h_augmented[i+1], bias=False))
            else:
                setattr(self, f"linear{i+1}", nn.Linear(self.h_augmented[i], self.h_augmented[i+1]))
                setattr(self, f"activation{i+1}", activation_function())

    def forward(self, x):
        """
        Computes a forward step given the input x.
        :param x: Input for the neural network.
        :return: x: Output neural network
        """

        for i, module in enumerate(self.modules()):
            # The first module is the whole NNC2P object, continue
            if i == 0:
                continue
            x = module(x)

        return x

Get the training data as DataSet and DataLoader objects:

In [112]:
# Read data as pandas dataframes
train_df = pd.read_csv(os.path.join(master_dir, "Data/SLy4_training_data.csv"))
test_df  = pd.read_csv(os.path.join(master_dir, "Data/SLy4_test_data.csv"))
# Convert to PyTorch Datasets as we defined them
train_dataset = data.CustomDataset(train_df)
test_dataset  = data.CustomDataset(test_df, mean=train_dataset.mean, std=train_dataset.std)
# Then create dataloaders, with batch size 32, from datasets
train_dataloader = DataLoader(train_dataset, batch_size=32)
test_dataloader  = DataLoader(test_dataset, batch_size=32)

In [113]:
train_df

Unnamed: 0,rho,eps,v,temp,ye,p,D,S,tau
0,13.590663,19.381445,0.683568,-0.333333,0.045,31.985334,18.620245,580.748110,655.358209
1,6.457329,28.786040,0.041886,2.166667,0.545,34.766248,6.463001,13.269992,186.431092
2,11.323996,19.654200,0.558550,0.866667,0.535,30.418591,13.652070,310.329455,393.570454
3,8.723996,19.246855,0.009527,-0.300000,0.095,26.341137,8.724392,2.749496,167.935288
4,5.523996,29.186040,0.334705,2.033333,0.095,34.232914,5.862105,103.365923,195.482542
...,...,...,...,...,...,...,...,...,...
99995,3.323996,25.652376,0.577186,0.600000,0.435,28.498601,4.070468,125.881369,157.178893
99996,9.757329,19.124371,0.376611,-2.900000,0.295,27.371010,10.532847,140.768515,238.842241
99997,9.590663,19.114310,0.266570,-0.833333,0.305,27.176186,9.950723,90.427775,207.064143
99998,14.690663,20.976110,0.476948,2.233333,0.485,35.391058,16.714229,326.155098,461.688397


Have a look at normalization procedures

In [114]:
index = 0
example = torch.from_numpy(np.array([train_df["D"][index], train_df["S"][index], train_df["tau"][index]])).float()
print(example)
print(train_dataset.mean, train_dataset.std)
train_dataset.__getitem__(index)

tensor([ 18.6202, 580.7481, 655.3582])
[ 10.61740893 171.68079015 283.8404585 ] [  4.38641801 155.12454904 147.9843713 ]


(tensor([1.8245, 2.6370, 2.5105]), tensor([31.9853]))

Create a new instance of the NNC2P:

In [115]:
model = NNC2P(h=[600, 200], activation_function=nn.ReLU)
model
# for i, module in enumerate(model.modules()):
#     print(i)
#     print(module)

NNC2P(
  (linear1): Linear(in_features=3, out_features=600, bias=True)
  (activation1): ReLU()
  (linear2): Linear(in_features=600, out_features=200, bias=True)
  (activation2): ReLU()
  (linear3): Linear(in_features=200, out_features=1, bias=False)
)

Create a trainer object from it:

In [116]:
trainer = nnc2p.Trainer(model, 1e-2, train_dataloader=train_dataloader, test_dataloader=test_dataloader)

In [117]:
nnc2p.count_parameters(model)

122800

## Train the network

In [118]:
trainer.train()

Training the model for 500 epochs.

 Epoch 0 
 --------------
Train loss: 3.31E-01
Test  loss: 3.25E-01

 Epoch 1 
 --------------
Train loss: 2.84E-01
Test  loss: 2.79E-01

 Epoch 2 
 --------------
Train loss: 3.48E-01
Test  loss: 3.44E-01

 Epoch 3 
 --------------
Train loss: 6.63E-01
Test  loss: 6.63E-01

 Epoch 4 
 --------------
Train loss: 4.68E-01
Test  loss: 4.62E-01

 Epoch 5 
 --------------
Train loss: 3.69E-01
Test  loss: 3.65E-01

 Epoch 6 
 --------------
Train loss: 4.30E-01
Test  loss: 4.26E-01

 Epoch 7 
 --------------
Train loss: 4.41E-01
Test  loss: 4.35E-01

 Epoch 8 
 --------------
Train loss: 4.13E-01
Test  loss: 4.08E-01

 Epoch 9 
 --------------


KeyboardInterrupt: 

__TODO__ do hyperparameter search over learning rate?

__TODO__ is the normalization done correctly?

In [171]:
index = 2
print(train_df["p"][index])
example = torch.from_numpy(np.array([train_df["S"][index], train_df["D"][index], train_df["tau"][index]])).float()
print(example)
# Check the results
print(model(example))

30.41859103563728
tensor([0.8938, 0.6918, 0.7415])
tensor([-0.0249], grad_fn=<SqueezeBackward3>)


# NNE2T

I was wondering if it would be easy to convert in some way from temperature to energy, as that is done as well in the code with rootfinding.

In [34]:
class NNE2T(nn.Module):
    """
    Implements a neural network for the energy to temperature conversion, using tabulated SLy4 EOS.
    """
    def __init__(self, h: list = [10, 20], reg: bool = False, activation_function = torch.nn.Sigmoid) -> None:
        """
        Initialize the neural network class.
        """
        # Call the super constructor first
        super(NNE2T, self).__init__()
        
        # For convenience, save the sizes of the hidden layers as fields as well
        self.h = h
        # Add visible layers as well: input is 3D and output is 1D
        self.h_augmented = [1] + h + [1]

        # Add field to specify whether or not we do regularization
        self.regularization = reg

        # Define the layers:
        for i in range(len(self.h_augmented)-1):
            if i == len(self.h_augmented)-2:
                setattr(self, f"linear{i+1}", nn.Linear(self.h_augmented[i], self.h_augmented[i+1], bias=False))
            else:
                setattr(self, f"linear{i+1}", nn.Linear(self.h_augmented[i], self.h_augmented[i+1]))
                setattr(self, f"activation{i+1}", activation_function())

    def forward(self, x):
        """
        Computes a forward step given the input x.
        :param x: Input for the neural network.
        :return: x: Output neural network
        """

        for i, module in enumerate(self.modules()):
            # The first module is the whole NNC2P object, continue
            if i == 0:
                continue
            x = module(x)

        return x

Get the training data as DataSet and DataLoader objects:

In [100]:
# Read data as pandas dataframes
train_df = pd.read_csv(os.path.join(master_dir, "Data/SLy4_training_data.csv"))
test_df  = pd.read_csv(os.path.join(master_dir, "Data/SLy4_test_data.csv"))
# Convert to PyTorch Datasets as we defined them
train_dataset = data.CustomDataset(train_df, feature_names = ["eps"], label_names = ["temp"], normalize=False)
test_dataset  = data.CustomDataset(test_df, feature_names = ["eps"], label_names = ["temp"], mean = train_dataset.mean, std = test_dataset.std, normalize=False)
# Then create dataloaders, with batch size 32, from datasets
train_dataloader = DataLoader(train_dataset, batch_size=32)
test_dataloader  = DataLoader(test_dataset, batch_size=32)

In [101]:
# sanity check
train_dataset.mean == test_dataset.mean

array([ True])

Check normalization procedure

In [102]:
train_df

Unnamed: 0,rho,eps,v,temp,ye,p,D,S,tau
0,13.590663,19.381445,0.683568,-0.333333,0.045,31.985334,18.620245,580.748110,655.358209
1,6.457329,28.786040,0.041886,2.166667,0.545,34.766248,6.463001,13.269992,186.431092
2,11.323996,19.654200,0.558550,0.866667,0.535,30.418591,13.652070,310.329455,393.570454
3,8.723996,19.246855,0.009527,-0.300000,0.095,26.341137,8.724392,2.749496,167.935288
4,5.523996,29.186040,0.334705,2.033333,0.095,34.232914,5.862105,103.365923,195.482542
...,...,...,...,...,...,...,...,...,...
99995,3.323996,25.652376,0.577186,0.600000,0.435,28.498601,4.070468,125.881369,157.178893
99996,9.757329,19.124371,0.376611,-2.900000,0.295,27.371010,10.532847,140.768515,238.842241
99997,9.590663,19.114310,0.266570,-0.833333,0.305,27.176186,9.950723,90.427775,207.064143
99998,14.690663,20.976110,0.476948,2.233333,0.485,35.391058,16.714229,326.155098,461.688397


In [103]:
index = 0
example = torch.from_numpy(np.array([train_df["eps"][index]])).float()
print(example)
print(train_dataset.mean, train_dataset.std)
train_dataset.__getitem__(index)

tensor([19.3814])
[21.05666875] [2.88130898]


(tensor([19.3814]), tensor([-0.3333]))

Create a new instance of the neural net:

In [108]:
model = NNE2T([100, 200], activation_function=nn.ReLU)
model

NNE2T(
  (linear1): Linear(in_features=1, out_features=100, bias=True)
  (activation1): ReLU()
  (linear2): Linear(in_features=100, out_features=200, bias=True)
  (activation2): ReLU()
  (linear3): Linear(in_features=200, out_features=1, bias=False)
)

Create a trainer object from it:

In [109]:
trainer = nnc2p.Trainer(model, 0.1, train_dataloader=train_dataloader, test_dataloader=test_dataloader)

In [110]:
nnc2p.count_parameters(model)

20600

## Train the network

In [111]:
trainer.train()

Training the model for 500 epochs.

 Epoch 0 
 --------------
Train loss: 2.05E+00
Test  loss: 2.07E+00

 Epoch 1 
 --------------
Train loss: 2.07E+00
Test  loss: 2.08E+00

 Epoch 2 
 --------------
Train loss: 2.06E+00
Test  loss: 2.08E+00

 Epoch 3 
 --------------
Train loss: 2.55E+00
Test  loss: 2.57E+00

 Epoch 4 
 --------------
Train loss: 2.55E+00
Test  loss: 2.57E+00

 Epoch 5 
 --------------
Train loss: 2.55E+00
Test  loss: 2.57E+00

 Epoch 6 
 --------------
Train loss: 2.55E+00
Test  loss: 2.57E+00

 Epoch 7 
 --------------
Train loss: 2.55E+00
Test  loss: 2.57E+00

 Epoch 8 
 --------------
Train loss: 2.55E+00
Test  loss: 2.57E+00

 Epoch 9 
 --------------


KeyboardInterrupt: 

In [None]:
index = 2
print(train_df["eps"][index])
example = torch.from_numpy(np.array([train_df["temp"][index]])).float()
print(example)
# Check the results
print(model(example))

19.65420023247585
tensor([0.8667])
tensor([1.2086], grad_fn=<SqueezeBackward3>)


# Archive

In [70]:
# Open the HDF5 file
with h5py.File(eos_table_filename, 'r') as file:
    eos_table = file
    Abar = file["Abar"][()]
    Albar = file["Albar"][()]
    Xa = file["Xa"][()]
    Xh = file["Xh"][()]
    Xn = file["Xn"][()]
    Xp = file["Xp"][()]
    Zbar = file["Zbar"][()]
    cs2 = file["cs2"][()]
    dedt = file["dedt"][()]
    dpderho = file["dpderho"][()]
    dpdrhoe = file["dpdrhoe"][()]
    energy_shift = file["energy_shift"][()]
    entropy = file["entropy"][()]
    gamma = file["gamma"][()]
    logenergy = file["logenergy"][()]
    logpress = file["logpress"][()]
    logrho = file["logrho"][()]
    logtemp = file["logtemp"][()]
    mu_e = file["mu_e"][()]
    mu_n = file["mu_n"][()]
    muhat = file["muhat"][()]
    munu = file["munu"][()]
    pointsrho = file["pointsrho"][()]
    pointstemp = file["pointstemp"][()]
    pointsye = file["pointsye"][()]
    u = file["u"][()] ## these don't exist???
    r = file["r"][()]
    ye = file["ye"][()]
# Print message
print(f"We successfully loaded the EOS table. We have {pointsrho[0]} rho, {pointstemp[0]} temp, {pointsye[0]} ye points.")
print(f"We have {pointsrho[0]*pointstemp[0]*pointsye[0]} data points.")

We successfully loaded the EOS table. We have 391 rho, 163 temp, 66 ye points.
We have 4206378 data points.
