Reading Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

raw_df = pd.read_csv('/content/drive/MyDrive/Projects/Nuclear Physics GPT/Data/nuclear_physics_df_web.csv', index_col=0).reset_index(drop=True)

raw_df.head()

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life
0,1,0,1.007825,7.2889705MeV,0MeV,Stable
1,1,1,2.014102,13.13572158MeV,1.112283MeV,Stable
2,1,2,3.016049,14.949806MeV,2.827266MeV,12.32 y
3,1,3,4.027806,25.901518MeV,1.400351MeV,9.917391304348×10-14 ns
4,1,4,5.035311,32.89244MeV,1.33636MeV,8.00350877193×10-14 ns


Cleaning Df

In [None]:
def convert_half_life(value):
    if pd.isnull(value):
        return np.nan
    elif value == 'Stable':
        return np.inf
    else:
        number, unit = value.split(' ')

        # Handle scientific notation
        if '×10' in number:
            base, exponent = number.split('×10')
            number = float(base) * 10**int(exponent)
        else:
            number = float(number)

        # Convert units to seconds
        if unit == 'ns':
            return number * 1e-9
        elif unit == 'µs':
            return number * 1e-6
        elif unit == 'ms':
            return number * 1e-3
        elif unit == 's':
            return number
        elif unit == 'm':
            return number * 60
        elif unit == 'h':
            return number * 3600
        elif unit == 'd':
            return number * 86400
        elif unit == 'y':
            return number * 3.154e+7
        else:
            return number



def map_to_continuous_label(x):
    global max_value
    if x < 1e-6:
        return x / 1e-6
    elif x < 1e-3:
        return 1 + (np.log10(x / 1e-6) / np.log10(1e-3 / 1e-6))
    elif x < 1:
        return 2 + (np.log10(x / 1e-3) / np.log10(1 / 1e-3))
    elif x < 60:
        return 3 + (np.log10(x / 1) / np.log10(60 / 1))
    elif x < 3600:
        return 4 + (np.log10(x / 60) / np.log10(3600 / 60))
    elif x < 86400:
        return 5 + (np.log10(x / 3600) / np.log10(86400 / 3600))
    elif x < 31536000:
        return 6 + (np.log10(x / 86400) / np.log10(31536000 / 86400))
    else:
        return 7 + (np.log10(x / 31536000) / np.log10(max_value / 31536000))


In [None]:
clean_df = raw_df.copy()

# Remove 'MeV' from 'Mass Excess' and 'Binding Energy' columns and convert to float
clean_df['Mass Excess'] = clean_df['Mass Excess'].str.replace('MeV', '').astype(float)
clean_df['Binding Energy'] = clean_df['Binding Energy'].str.replace('MeV', '').astype(float)

clean_df['Half-life'] = clean_df['Half-life'].apply(convert_half_life)

# Replace np.inf values with maximum half-life
max_halflife = clean_df[clean_df['Half-life'] != np.inf]['Half-life'].max()
clean_df['Half-life'] = clean_df['Half-life'].replace(np.inf, max_halflife**2)

# Create Different Half-life Metrics
clean_df['Half-life Magnitude'] = clean_df['Half-life'].apply(lambda x: np.floor(np.log10(np.abs(x))) if x != 0 else -60)

max_value = clean_df['Half-life'].max()  # Get the maximum value in the 'Half-life' column
clean_df['Half-life Time'] = clean_df['Half-life'].apply(map_to_continuous_label)

# Display cleaned data
clean_df.head()

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time
0,1,0,1.007825,7.28897,0.0,2.486929e+62,62.0,8.0
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.0
2,1,2,3.016049,14.949806,2.827266,388572800.0,8.0,7.019868
3,1,3,4.027806,25.901518,1.400351,9.917391000000001e-23,-23.0,9.917391e-17
4,1,4,5.035311,32.89244,1.33636,8.003509000000001e-23,-23.0,8.003509000000001e-17


In [None]:
clean_df.to_csv('nuclear_physics_clean_df.csv')

In [None]:
# Decay Mode
decay_mapping = {'Stable': 0,
                 'β-': 1, 'β+': 2, 'α': 3, 'n': 4, 'p': 5, 'ϵ': 6, 'e+': 7,
                 '2n': 8, '2p': 9, '2β+': 10, '2β-': 11,
                 'β-α': 12, 'β-n': 13, 'β+α': 14, 'β+p': 15, 'β-2n': 16,
                 'SF': 17, 'β+SF': 18}

decay_type_mapping = {'Stable': 0, # Stable Isotopes
                          'β-': 1, 'β+': 1, 'α': 1, 'n': 1, 'p': 1, 'ϵ': 1, 'e+': 1, # Single Decay Modes
                          '2n': 2, '2p': 2, '2β+': 2, '2β-': 2, # Double Decay Modes
                          'β-α': 3, 'β-n': 3, 'β+α': 3, 'β+p': 3, 'β-2n': 3, # Composite Decay Modes
                          'SF': 4, 'β+SF': 4} # Special Modes

particle_decay_mapping = {'Stable': 0, # Stable
                          'α': 1, # Alpha
                          'ϵ': 2, 'e+': 2, 'β+': 2, 'β-': 2, # Electron Capture/Beta
                          'β-': 3, 'β+': 3, 'β-α': 3, 'β-n': 3, 'β+α': 3, 'β+p': 3, 'β-2n': 3, '2β+': 3, '2β-': 3, # Beta-Related
                          'p': 4, '2p': 4, # Proton
                          'n': 5, '2n': 5, 'β-n': 5, 'β-2n': 5, # Neutron
                          'SF': 6, # Electron Capture
                          'β+SF': 6} # Special Mode

# Apply the mapping to create a new numerical column
clean_df['Decay Mode Particle'] = clean_df['Decay Mode'].map(particle_decay_mapping)
clean_df['Decay Mode Type'] = clean_df['Decay Mode'].map(decay_type_mapping)
clean_df['Decay Mode'] = clean_df['Decay Mode'].map(decay_mapping)