## Data processing
----
In this notebook, we will compute different quantities which will be useful to the training of our artificial neural network as well as for plotting. 
We will also change the units of some data to be sure everything is uniform. 
In the end, we will create a merged .csv file which will contain of the useful data for the next part of this project.


In [None]:
#Libraries for data processing
import pandas as pd
import numpy as np
from tqdm import tqdm

In [None]:
from logging import RootLogger
#Mount Google Drive
from google.colab import drive # import drive from google colab

root = "/content/drive"        # default location for the drive

drive.mount(root)              # we mount the google drive at /content/drive

#import join used to join root path and my_google_drive_path
from os.path import join  

#path to your project on Google Drive
my_google_drive_path = "MyDrive/StudentProject2023"

project_path = join(root, my_google_drive_path)

###We will calculate some more data for DZ10
-Binding Energy

(-S1p & S2n)

-S2p & S2n

Data is already in MeV

#### A and Binding Energy

In [None]:
#Reading DZ10 data as binding_energy

dz_data = pd.read_csv(join(project_path,"2_processed_data/dz_data.csv"), sep=";")

#We create a new column in pandas dataframe "dz_data" containing mass number
dz_data["A"] = dz_data["Z"] + dz_data["N"]

#We create a new column containing Binding Energy (we multiply by A)
dz_data["dz_BE"] = dz_data["dz_BE/A"] * dz_data["A"]

#### Computation of S1n and S1p


In [None]:
dz_data.sort_values(by=['Z'], ascending=True)
dz_data['dz_S1n'] = dz_data['dz_BE'] - dz_data['dz_BE'].shift(1)


dz_data = dz_data.sort_values(by=['N','A'], ascending=True)
dz_data['dz_S1p'] = dz_data['dz_BE'] - dz_data['dz_BE'].shift(1)

#### Computation of S2n and S2p

In [None]:
#Data already well sorted by the computation of S1p
#dz_data = dz_data.sort_values(by=['N','A'], ascending=True)
dz_data['dz_S2p'] = dz_data['dz_BE'] - dz_data['dz_BE'].shift(2)

dz_data = dz_data.sort_values(by=['Z','A'], ascending=True)
dz_data['dz_S2n'] = dz_data['dz_BE'] - dz_data['dz_BE'].shift(2)

### We will calculate some data for AME

-Binding energy 

-S2n & S2p

We will also drop some data that we finally don't use

#### Binding Energy

In [None]:
ame_data = pd.read_csv(join(project_path,"2_processed_data/mass_data.csv"), sep=";")

ame_data['ame_BE'] = ame_data['ame_BE/A'] * ame_data['A']

#### Computation of S1n, S1p, S2n and S2p

In [None]:
ame_data = ame_data.sort_values(by=['N','Z'], ascending=True)
ame_data['ame_S2p'] = ame_data['ame_BE'] - ame_data['ame_BE'].shift(2)
ame_data['ame_S1p'] = ame_data['ame_BE'] - ame_data['ame_BE'].shift(1)

ame_data = ame_data.sort_values(by=['Z','N'], ascending=True)
ame_data['ame_S2n'] = ame_data['ame_BE'] - ame_data['ame_BE'].shift(2)
ame_data['ame_S1n'] = ame_data['ame_BE'] - ame_data['ame_BE'].shift(1)

### From these two documents we will merge all the data we need in one dataframe and .csv file

#### First of all, we will change units and calculate some quantities that may be useful to train our artificial neural networks

In [None]:
merged_data=pd.merge(dz_data, ame_data, on=["Z", "N", "A"])

#Change of units
#AME file gives energies in keV
#DZ10 file gives energies in MeV
#We want MeV at the end
merged_data["ame_BE/A"] = merged_data["ame_BE/A"]/1000
merged_data["ame_ME"] = merged_data["ame_ME"]/1000
merged_data["ame_BE"] = merged_data["ame_BE"]/1000
merged_data["ame_S2p"] = merged_data["ame_S2p"]/1000
merged_data["ame_S2n"] = merged_data["ame_S2n"]/1000
merged_data["ame_S1p"] = merged_data["ame_S1p"]/1000
merged_data["ame_S1n"] = merged_data["ame_S1n"]/1000

#We compute the difference in binding energy AME - DZ10
merged_data["BE_diff_dz_ame"] = merged_data["ame_BE"]- merged_data["dz_BE"]


#### We compute some quantities related to the Liquid Drop Model


In [None]:
alphaV=15.49 
alphaS=17.23
alphaC=0.697
alphaA=23.285
alphaP=12

merged_data["Surf"] = np.power(merged_data["A"],2/3)
merged_data["Asym"] = ( (merged_data["N"]-merged_data["Z"])**2 ) / merged_data["A"]
merged_data["Coul"] = ( merged_data["Z"]*(merged_data["Z"]-1) ) / np.power(merged_data["A"],1/3)
merged_data["Pair"] = np.power(merged_data["A"],-1/2)
merged_data["Z_parity"] = np.power(-1,merged_data["Z"])
merged_data["N_parity"] = np.power(-1,merged_data["N"])

merged_data["ldm_BE"]=alphaV * merged_data["A"] - alphaS * merged_data["Surf"] - alphaC * merged_data["Coul"] - alphaA * merged_data["Asym"]

#### Then we compute others quantities that give more physics terms such as the parity for N and Z, and their distance with respect to magic numbers

In [None]:
merged_data["Z_parity"] = np.power(-1,merged_data["Z"])
merged_data["N_parity"] = np.power(-1,merged_data["N"])

magic_numbers = [2, 8, 20, 28, 50, 82, 126, 184]

#Adding two columns for the distance with respect to magic numbers
merged_data["Z_distance"] = None
merged_data["N_distance"] = None

#Compute the distance to magic numbers
for i, row in merged_data.iterrows():
    z = row["Z"]
    n = row["N"]
    merged_data.at[i, "Z_distance"] = min([abs(z - m) for m in magic_numbers])
    merged_data.at[i, "N_distance"] = min([abs(n - m) for m in magic_numbers])

In [None]:
#We save this merged dataframe to .csv
merged_data.to_csv(join(project_path,"2_processed_data/merged_data.csv"),sep=";", index=False)

In [None]:
train_data

Unnamed: 0,Z,N,dz_BE/A,dz_ME,A,dz_BE,dz_S1n,dz_S1p,dz_S2p,dz_S2n,...,N_distance,N-Z,ame_AM_unc,ame_BDE,ame_BDE_unc,ame_BE/A_unc,ame_ME_unc,ame_S1n,ame_S1p,ldm_BE
29,6,10,6.898167,14.077316,16,110.370667,4.507875,23.188839,42.117482,5.902274,...,2,4.0,3.840,8010.2260,4.2540,0.2236,3.578,4.250330,22.552217,106.853180
30,6,11,6.508854,21.868843,17,110.650516,0.27985,23.692936,44.979583,4.787725,...,3,5.0,18.641,13161.8007,22.9464,1.0215,17.365,0.733572,23.368789,107.039266
31,6,12,6.349189,26.305328,18,114.285398,3.634882,26.230468,48.032087,3.914732,...,4,6.0,32.206,11806.0982,35.2821,1.6667,30.000,4.183932,26.086027,105.930783
36,7,9,7.419595,4.95211,16,118.713516,3.493556,12.850725,33.43796,13.427935,...,1,2.0,2.470,10420.9094,2.3014,0.1438,2.301,2.488849,11.478210,120.997675
37,7,10,7.341997,6.923058,17,124.813943,6.100427,14.443277,37.632115,9.593983,...,2,3.0,16.103,8678.8430,15.0000,0.8824,15.000,5.885146,13.113026,125.701707
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2710,116,176,7.135197,182.612793,292,2083.477571,7.381357,2.864731,4.224426,13.380959,...,8,60.0,819.000,-5488.0000,1014.0000,3.0000,763.000,7.116000,3.333000,2076.143461
2711,116,177,7.130592,184.898193,293,2089.263418,5.785847,2.8804,4.649166,13.167204,...,7,61.0,553.000,-3860.0000,933.0000,2.0000,515.000,5.651000,3.315000,2082.863076
2712,117,176,7.11299,189.273193,293,2084.106181,7.416527,0.628611,3.493342,13.816363,...,8,59.0,835.000,-4374.0000,1053.0000,3.0000,778.000,7.095000,0.963000,2077.589892
2713,117,177,7.109862,191.151123,294,2090.299525,6.193344,1.036107,3.916508,13.609871,...,7,60.0,637.000,-2923.0000,811.0000,2.0000,593.000,6.213000,1.525000,2084.484138
