First we import the necessary libraries and we mount google drive to this notebook


In [10]:
#Libraries for data processing
import numpy as np 
import pandas as pd
import csv

In [11]:
from logging import RootLogger
# Mount Google Drive
from google.colab import drive # import drive from google colab

root = "/content/drive"        # default location for the drive

drive.mount(root)              # we mount the google drive at /content/drive

# import join used to join root path and my_google_drive_path
from os.path import join  

# path to your project on Google Drive
my_google_drive_path = "MyDrive/StudentProject2023"

project_path = join(root, my_google_drive_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


We will process and extract the data we need from the AME mass table

Strangely, when we first compile this program, it only gets 2570 elements when there should be 3558, if we compile again, all is fine.


In [12]:
#Open AME mass table document
mass_file = open(join(project_path,"raw_data/mass_1.mas20.txt"),"r+")


#We create the .csv file and give the name of the columns
mass_csv = open(join(project_path,"processed_data/mass_data.csv"),"w+")
ame_csv_header_row = "N-Z;N;Z;A;ame_ME;ame_ME_unc;ame_BE/A;ame_BE/A_unc;ame_BDE;ame_BDE_unc;ame_AM;ame_AM_unc\n"
mass_csv.writelines(ame_csv_header_row)

#Extract data from AME mass data into a pandas dataframe and csv file
#We should have 3378 entries in .csv doc, so 3377 elements in it
#As the first line is the column names

element_list = mass_file.readlines()


#The following lines are for the purpose of standardization of the data
#As the data is in a complicated format (some values are empty) and thus
#we need to process the data ourselves

for element in element_list :

    
    splitted_line = element.split() #Split a string separated by spaces
    #We will get a list of 15 elements in the end

    #All elements have a column with only "B-" written
    if splitted_line.index("B-") == 11 : 
        #We want to get rid of indices 0 and 6
        splitted_line.pop(0) 
        splitted_line.pop(5) #5 as index 0 is already removed by .pop(0)
    
    if splitted_line.index("B-") == 10 :
        if (int(splitted_line[1]) - int(splitted_line[2]) == int(splitted_line[0]) and 
            int(splitted_line[1]) + int(splitted_line[2]) == int(splitted_line[3])) :
            splitted_line.pop(5)
        else : #The only other possibility is 9 which is what we look for
            splitted_line.pop(0)
    
    if len(splitted_line) != 15 :
        #Beta-decay energies uncertainties are sometimes empty, we add a 0
        splitted_line.insert(11,"0") 
        

    
    #We get rid of element symbol and "B-" in the list
    #We now have list of 13 elements
    if splitted_line[10].find("*") != -1 :
        splitted_line[10] = "0" #Replace "*" by "0"
    
    splitted_line.pop(4) #Getting rid of element symbols
    splitted_line.pop(8) #Getting rid of "B-" string

    #Values for atomic_mass follow a strange format
    #We thus concatenate two columns
    #index 10 & 11
    atomic_mass_coma = splitted_line.pop(11)
    atomic_mass_coma = "." + atomic_mass_coma.replace(".","")
    splitted_line[10] = splitted_line[10] + atomic_mass_coma
    
    
    #We now have list of 12 elements

    #Remove "#" and standardization of the list in order to convert into array
    for i in range(12) :
        if splitted_line[i].find("#") != -1 :
            splitted_line[i] = splitted_line[i].replace("#","")

    mass_csv.writelines(";".join(splitted_line) + "\n")
    


We will now process the DZ10 data provided by the fortran program

Same problem here, we start with 13685/16042 and if we recompile its 16040


In [14]:
#Open DZ10 document
duzu_file=open(join(project_path,"raw_data/duzu.txt"),"r+")

#Extract data from DZ10 into a .csv file

dz_element_list=duzu_file.readlines()

dz_csv=open(join(project_path,"processed_data/dz_data.csv"),"w+")
dz_csv_header_row="Z;N;dz_BE/A;dz_ME\n"
dz_csv.writelines(dz_csv_header_row)

for element in dz_element_list :
    dz_split_line=element.split() 
    dz_split_line.pop(0)
    dz_split_line.pop(1)
    dz_split_line.pop(2)
    dz_split_line.pop(3)

    if not(dz_split_line[2].find("NaN")!=-1 or 
           dz_split_line[3].find("NaN")!=-1 or
           np.float128(dz_split_line[2])<0) :  #Negative binding energy

           dz_csv.writelines(";".join(dz_split_line)+"\n")
