## Data rescaling 
----
Artificial neural networks work best when dealing with small value data. Some of our inputs in the ANN will be the proton and neutron numbers, which can go to values up to 200. This is why we need to rescale them, and we will do so in this notebook by rescaling them between 0 and 1. We will then separate the data into two different datasets (and thus two different .csv file) to create a training dataset and a test dataset for our ANN. It is needed to save also the data which has not been separated to be able to rescale back the values in the following notebooks. 

In [None]:
#Libraries for data processing
import numpy as np 
import pandas as pd

#Libraries for plotting
import matplotlib.pyplot as plt
import seaborn as sns 
sns.set(color_codes = True)
sns.set(font_scale=1.5) #fixing font size

#Library for rescaling
from sklearn.preprocessing import MinMaxScaler

In [None]:
from logging import RootLogger
#Mount Google Drive
from google.colab import drive #import drive from google colab

root = "/content/drive"     #default location for the drive

drive.mount(root)           #we mount the google drive at /content/drive

import join used to join ROOT path and MY_GOOGLE_DRIVE_PATH
from os.path import join  

#path to your project on Google Drive
my_google_drive_path = "MyDrive/StudentProject2023"

project_path = join(root, my_google_drive_path)

In [None]:
merged_data = pd.read_csv(join(project_path,"processed_data/merged_data.csv"), sep=";")

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))

def rescale(list) :
    """This function adds new columns to the merged dataframe with data rescaled
    between 0 and 1"""
    for column in list :
        merged_data["rescaled_"+column]=scaler.fit_transform(pd.Series.to_numpy(merged_data[column]).reshape(-1,1))


In [None]:
columns=["ame_BE","N","Z","Surf","Asym","Coul","Pair","Z_parity","N_parity","Z_distance","N_distance", "ame_S1p", "ame_S1n", "ame_S2p", "ame_S2n"]

rescale(columns)

###We save this merged dataframe to .csv format

In [None]:
merged_data.to_csv(join(project_path,"rescaled_data/rescaled_data.csv"),sep=";", index=False)

###Before separating the previous merged dataframe into a training data and a test dataset, we will get rid of some nuclei. 

In [None]:
merged_data.drop(merged_data[(merged_data["ame_S2n"]<0 )].index, inplace=True)
merged_data.drop(merged_data[(merged_data["ame_S2p"]<0 )].index, inplace=True)

merged_data.drop(merged_data[(merged_data["ame_S1n"]<0 )].index, inplace=True)
merged_data.drop(merged_data[(merged_data["ame_S1p"]<0 )].index, inplace=True)

In [None]:
#From the merged table, create one training dataset and a validation one
#Not sure the next two lines are useful
train_data = pd.DataFrame(columns=["Z","N","dz_BE/A","dz_ME","A","dz_BE","dz_S1n","dz_S1p","dz_S2p", "dz_S2n","ame_ME", "ame_BE/A", "ame_AM", "ame_BE", "ame_S1p", "ame_S1n", "ame_S2p", "ame_S2n", "BE_diff_dz_ame","Surf","Asym","Coul","Pair","Z_parity","N_parity","Z_distance","N_distance"])
validation_data = pd.DataFrame(columns=["Z","N","dz_BE/A","dz_ME","A","dz_BE","dz_S1n","dz_S1p","dz_S2p", "dz_S2n","ame_ME", "ame_BE/A", "ame_AM", "ame_BE", "ame_S1p", "ame_S1n", "ame_S2p", "ame_S2n", "BE_diff_dz_ame","Surf","Asym","Coul","Pair","Z_parity","N_parity","Z_distance","N_distance"])


#We separate the merged dataframe into training and validation datasets
for i in range(len(merged_data)) :
    
    if int(merged_data.iloc[i]["Z"]) in [10,38,54,68,82] :
        validation_data = validation_data.append(merged_data.iloc[i], ignore_index=True)

    else :
        train_data = train_data.append(merged_data.iloc[i], ignore_index=True)


#We don't use training data with A<16 because these light nuclei experience
#Physics phenomenon that are very far from trivial (halo etc)
train_data.drop(train_data[(train_data["A"]<16 )].index, inplace=True)
train_data.drop(train_data[(train_data["ame_S2n"]<0 )].index, inplace=True)
train_data.drop(train_data[(train_data["ame_S2p"]<0 )].index, inplace=True)
validation_data.drop(validation_data[(validation_data["ame_S2n"]<0 )].index, inplace=True)
validation_data.drop(validation_data[(validation_data["ame_S2p"]<0 )].index, inplace=True)

train_data.drop(train_data[(train_data["ame_S1n"]<0 )].index, inplace=True)
train_data.drop(train_data[(train_data["ame_S1p"]<0 )].index, inplace=True)
validation_data.drop(validation_data[(validation_data["ame_S1n"]<0 )].index, inplace=True)
validation_data.drop(validation_data[(validation_data["ame_S1p"]<0 )].index, inplace=True)

train_merged_csv = train_data.to_csv(join(project_path,"rescaled_data/train_rescaled_data.csv"),sep=";")
validation_merged_csv = validation_data.to_csv(join(project_path,"rescaled_data/validation_rescaled_data.csv"),sep=";")

In [None]:
train_data

Unnamed: 0,Z,N,dz_BE/A,dz_ME,A,dz_BE,dz_S1n,dz_S1p,dz_S2p,dz_S2n,...,rescaled_Pair,rescaled_Surf,rescaled_Z,rescaled_Z_distance,rescaled_Z_parity,rescaled_ame_BE,rescaled_ame_S1n,rescaled_ame_S1p,rescaled_ame_S2n,rescaled_ame_S2p
29,6.0,10.0,6.898167,14.077316,16.0,110.370667,4.507875,23.188839,42.117482,5.902274,...,0.203634,0.123505,0.050847,0.090909,1.0,0.056133,0.934231,0.963426,0.898377,0.946441
30,6.0,11.0,6.508854,21.868843,17.0,110.650516,0.27985,23.692936,44.979583,4.787725,...,0.195708,0.129552,0.050847,0.090909,1.0,0.056483,0.924373,0.966567,0.897059,0.955251
31,6.0,12.0,6.349189,26.305328,18.0,114.285398,3.634882,26.230468,48.032087,3.914732,...,0.188452,0.135480,0.050847,0.090909,1.0,0.058481,0.934045,0.977019,0.896878,0.968236
36,7.0,9.0,7.419595,4.95211,16.0,118.713516,3.493556,12.850725,33.43796,13.427935,...,0.203634,0.123505,0.059322,0.045455,0.0,0.059584,0.929293,0.920830,0.919734,0.917646
37,7.0,10.0,7.341997,6.923058,17.0,124.813943,6.100427,14.443277,37.632115,9.593983,...,0.195708,0.129552,0.059322,0.045455,0.0,0.062394,0.938814,0.927118,0.906278,0.928452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2710,116.0,176.0,7.135197,182.612793,292.0,2083.477571,7.381357,2.864731,4.224426,13.380959,...,0.000317,0.993052,0.983051,0.454545,1.0,0.995441,0.942264,0.889499,0.919053,0.822659
2711,116.0,177.0,7.130592,184.898193,293.0,2089.263418,5.785847,2.8804,4.649166,13.167204,...,0.000211,0.995371,0.983051,0.454545,1.0,0.998139,0.938157,0.889430,0.918224,0.823528
2712,117.0,176.0,7.11299,189.273193,293.0,2084.106181,7.416527,0.628611,3.493342,13.816363,...,0.000211,0.995371,0.991525,0.409091,0.0,0.995901,0.942205,0.880383,0.919720,0.819369
2713,117.0,177.0,7.109862,191.151123,294.0,2090.299525,6.193344,1.036107,3.916508,13.609871,...,0.000105,0.997687,0.991525,0.409091,0.0,0.998867,0.939733,0.882545,0.919695,0.821261
