# Import libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch

# Import raw data

In [2]:
#File containing temperature values
raw_T_data = np.loadtxt('/Users/samsonmercier/Desktop/Work/PhD/Research/Second_Generals/Data/bt-4500k/training_data_T.csv', delimiter=',')
#File containing pressure values
raw_P_data = np.loadtxt('/Users/samsonmercier/Desktop/Work/PhD/Research/Second_Generals/Data/bt-4500k/training_data_P.csv', delimiter=',')

#Last 51 columns are the temperature/pressure values, 
#First 5 are the input values (H2 pressure in bar, CO2 pressure in bar, LoD in hours, Obliquity in deg, H2+Co2 pressure) but we remove the last one since it's not adding info.
raw_inputs = raw_T_data[:, :4]
raw_outputs_T = raw_T_data[:, 5:]
raw_outputs_P = raw_P_data[:, 5:]

#Storing useful quantitites
N = raw_inputs.shape[0] #Number of data points
D = raw_inputs.shape[1] #Number of features
O = raw_outputs_T.shape[1] #Number of outputs


# Plotting of the T-P profiles

In [None]:
for raw_input, raw_output_T, raw_output_P in zip(raw_inputs,raw_outputs_T,raw_outputs_P):
    fig, ax = plt.subplots(1, 1, figsize=[8, 6])
    ax.plot(raw_output_T, np.log(raw_output_P/1000), color='blue', linewidth=2)
    ax.invert_yaxis()
    ax.set_xlabel('Temperature (K)')
    ax.set_ylabel(r'log$_{10}$ Pressure (bar)')
    ax.set_title(rf'H$_2$O : {raw_input[0]} bar, CO$_2$ : {raw_input[1]} bar, LoD : {raw_input[2]:.0f} days, Obliquity : {raw_input[3]} deg')
    plt.show()

# Fitting the training data with a basic deep neural network

## 0th step : Shrink down data so we can work with it locally

In [3]:
#Number of samples to shrink our dataset to 
sample_size = 100

filter = np.random.choice(np.arange(N), size=sample_size, replace=False)

raw_inputs = torch.tensor(raw_inputs[filter, :], dtype=torch.float32)
raw_outputs_T = torch.tensor(raw_outputs_T[filter, :], dtype=torch.float32)
raw_outputs_P = torch.tensor(raw_outputs_P[filter, :], dtype=torch.float32)

N = sample_size

## First step : Define a training, validation, and testing set

In [4]:
#Defining partition of data used for 1. training 2. validation and 3. testing
data_partitions = [0.7, 0.1, 0.2]

#Defining the noise seed for the random partitioning of the training data
partition_seed = 4

#Splitting the data 
## Setting noise seec
generator = torch.Generator().manual_seed(partition_seed)
## Retrieving indices of data partitions
train_idx, valid_idx, test_idx = torch.utils.data.random_split(range(N), data_partitions, generator=generator)
## Generate the data partitions
### Training
train_inputs = raw_inputs[train_idx]
train_outputs_T = raw_outputs_T[train_idx]
train_outputs_P = raw_outputs_P[train_idx]
### Validation
valid_inputs = raw_inputs[valid_idx]
valid_outputs_T = raw_outputs_T[valid_idx]
valid_outputs_P = raw_outputs_P[valid_idx]
### Testing
test_inputs = raw_inputs[test_idx]
test_outputs_T = raw_outputs_T[test_idx]
test_outputs_P = raw_outputs_P[test_idx]