## Data rescaling 
----
Artificial neural networks work best when dealing with small value data. Some of our inputs in the ANN will be the proton and neutron numbers, which can go to values up to 200. This is why we need to rescale them, and we will do so in this notebook by rescaling them between 0 and 1. We will then separate the data into two different datasets (and thus two different .csv file) to create a training dataset and a test dataset for our ANN. It is needed to save also the data which has not been separated to be able to rescale back the values in the following notebooks. 

In [1]:
#Libraries for data processing
import numpy as np 
import pandas as pd

#Libraries for plotting
import matplotlib.pyplot as plt
import seaborn as sns 
sns.set(color_codes = True)
sns.set(font_scale=1.5) #fixing font size

#Library for rescaling
from sklearn.preprocessing import MinMaxScaler

In [2]:
merged_data = pd.read_csv("2_processed_data/merged_data.csv", sep=";")

In [3]:
scaler = MinMaxScaler(feature_range=(0,1))

def rescale(list) :
    """This function adds new columns to the merged dataframe with data rescaled
    between 0 and 1"""
    for column in list :
        merged_data["rescaled_"+column]=scaler.fit_transform(pd.Series.to_numpy(merged_data[column]).reshape(-1,1))

In [4]:
columns=["ame_BE","N","Z","Surf","Asym","Coul","Pair","Z_parity","N_parity","Z_distance","N_distance", "ame_S1p", "ame_S1n", "ame_S2p", "ame_S2n"]

rescale(columns)

### We save this merged dataframe to .csv format

In [5]:
merged_data.to_csv("3_rescaled_data/rescaled_data.csv",sep=";", index=False)

### Before separating the previous merged dataframe into a training data and a test dataset, we will get rid of some nuclei. 

In [6]:
merged_data.drop(merged_data[(merged_data["ame_S2n"]<0 )].index, inplace=True)
merged_data.drop(merged_data[(merged_data["ame_S2p"]<0 )].index, inplace=True)

merged_data.drop(merged_data[(merged_data["ame_S1n"]<0 )].index, inplace=True)
merged_data.drop(merged_data[(merged_data["ame_S1p"]<0 )].index, inplace=True)

In [7]:
#From the merged table, create one training dataset and a test dataset
#Not sure the next two lines are useful
train_data = pd.DataFrame(columns=["Z","N","dz_BE/A","dz_ME","A","dz_BE","dz_S1n","dz_S1p","dz_S2p", "dz_S2n","ame_ME", "ame_BE/A", "ame_AM", "ame_BE", "ame_S1p", "ame_S1n", "ame_S2p", "ame_S2n", "BE_diff_dz_ame","Surf","Asym","Coul","Pair","Z_parity","N_parity","Z_distance","N_distance"])
test_data = pd.DataFrame(columns=["Z","N","dz_BE/A","dz_ME","A","dz_BE","dz_S1n","dz_S1p","dz_S2p", "dz_S2n","ame_ME", "ame_BE/A", "ame_AM", "ame_BE", "ame_S1p", "ame_S1n", "ame_S2p", "ame_S2n", "BE_diff_dz_ame","Surf","Asym","Coul","Pair","Z_parity","N_parity","Z_distance","N_distance"])


#We separate the merged dataframe into training and test datasets
for i in range(len(merged_data)) :
    
    if int(merged_data.iloc[i]["Z"]) in [10,38,54,68,82] :
        test_data = test_data.append(merged_data.iloc[i], ignore_index=True)

    else :
        train_data = train_data.append(merged_data.iloc[i], ignore_index=True)


#We don't use training data with A<16 because these light nuclei experience
#Physics phenomenon that are very far from trivial (halo etc)
train_data.drop(train_data[(train_data["A"]<16 )].index, inplace=True)
train_data.drop(train_data[(train_data["ame_S2n"]<0 )].index, inplace=True)
train_data.drop(train_data[(train_data["ame_S2p"]<0 )].index, inplace=True)
test_data.drop(test_data[(test_data["ame_S2n"]<0 )].index, inplace=True)
test_data.drop(test_data[(test_data["ame_S2p"]<0 )].index, inplace=True)

train_data.drop(train_data[(train_data["ame_S1n"]<0 )].index, inplace=True)
train_data.drop(train_data[(train_data["ame_S1p"]<0 )].index, inplace=True)
test_data.drop(test_data[(test_data["ame_S1n"]<0 )].index, inplace=True)
test_data.drop(test_data[(test_data["ame_S1p"]<0 )].index, inplace=True)

train_merged_csv = train_data.to_csv("3_rescaled_data/train_rescaled_data.csv",sep=";")
test_merged_csv = test_data.to_csv("3_rescaled_data/test_rescaled_data.csv",sep=";")