In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Projects/Nuclear Physics GPT/Data/nuclear_physics_clean_df.csv', index_col=0).reset_index(drop=True)

df = df.dropna()
df.head()

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time
0,1,0,1.007825,7.28897,0.0,2.486929e+62,62.0,8.0
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.0
2,1,2,3.016049,14.949806,2.827266,388572800.0,8.0,7.019868
3,1,3,4.027806,25.901518,1.400351,9.917391000000001e-23,-23.0,9.917391e-17
4,1,4,5.035311,32.89244,1.33636,8.003509000000001e-23,-23.0,8.003509000000001e-17


In [3]:
def calculate_atomic_weight(proton, neutron):
  PROTON_MASS = 1.007276 # in atomic mass units
  NEUTRON_MASS = 1.008664 # in atomic mass units

  atomic_weight = proton * PROTON_MASS + neutron * NEUTRON_MASS
  return atomic_weight

def create_border_elements(df):
  # Identify the "top" border elements
  unique_protons = df['Protons'].unique()
  top_border_neutrons = {proton: df[df['Protons'] == proton]['Neutrons'].max() + 1 for proton in unique_protons}
  # Identify the "bottom" border elements
  bottom_border_neutrons = {proton: df[df['Protons'] == proton]['Neutrons'].min() - 1 for proton in unique_protons if df[df['Protons'] == proton]['Neutrons'].min() - 1 >= 0}

  border_elements = []
  for proton, neutron in list(top_border_neutrons.items()) + list(bottom_border_neutrons.items()):
    atomic_weight = calculate_atomic_weight(proton, neutron)
    border_elements.append([proton, neutron, atomic_weight, 0, 0, 0, 0, 0, 1])

  border_df = pd.DataFrame(border_elements, columns=df.columns)
  return pd.concat([df, border_df]).reset_index(drop=True)

In [4]:
df['Artificial'] = 0
df = create_border_elements(df)
df = create_border_elements(df)

df

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time,Artificial
0,1,0,1.007825,7.288970,0.000000,2.486929e+62,62.0,8.000000e+00,0
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.000000e+00,0
2,1,2,3.016049,14.949806,2.827266,3.885728e+08,8.0,7.019868e+00,0
3,1,3,4.027806,25.901518,1.400351,9.917391e-23,-23.0,9.917391e-17,0
4,1,4,5.035311,32.892440,1.336360,8.003509e-23,-23.0,8.003509e-17,0
...,...,...,...,...,...,...,...,...,...
3612,114,169,285.293680,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1
3613,115,170,287.309620,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1
3614,116,171,289.325560,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1
3615,117,172,291.341500,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1


In [5]:
df['N-Z'] = df['Neutrons'] - df['Protons']
df['Z-N'] = df['Protons'] - df['Neutrons']

df['Abs(Z-N)'] = abs(df['Protons'] - df['Neutrons'])

df['N/Z'] = np.where(df['Protons'] > 0, df['Neutrons'] / df['Protons'], 0)
df['Z/N'] = np.where(df['Neutrons'] > 0, df['Protons'] / df['Neutrons'], 0)

df['Z/Atomic Weight'] = df['Protons'] / df['Atomic Weight']
df['N/Atomic Weight'] = df['Neutrons'] / df['Atomic Weight']

df['Abs(Z-N)/Atomic Weight'] = df['Abs(Z-N)'] / df['Atomic Weight']

In [6]:
def add_transformed_columns(df):
    new_cols = pd.DataFrame()
    for col in df.columns:
        if 'Half-life' not in col and 'Artifical' not in col:
            # Square root transform
            new_cols[f'{col}_sqrt'] = df[col].apply(lambda x: np.sqrt(x) if x > 0 else 0)
            # Log transform
            new_cols[f'{col}_log'] = df[col].apply(lambda x: np.log1p(x) if x > 0 else 0) #log(1+x)
            # Exponential transform
            new_cols[f'{col}_exp'] = df[col].apply(lambda x: np.exp(x) if x > 0 else 0)
            # Inverse (reciprocal) transform
            new_cols[f'{col}_reciprocal'] = df[col].apply(lambda x: 1/x if x!=0 else 0)
            # Power transform
            new_cols[f'{col}_power'] = df[col].apply(lambda x: x**2 if x > 0 else 0)
            # Hyperbolic Tangent transform
            new_cols[f'{col}_tanh'] = df[col].apply(np.tanh)
    return pd.concat([df, new_cols], axis=1)

# Applying the transformation function
df = add_transformed_columns(df)
df.head()

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time,Artificial,N-Z,...,N/Atomic Weight_exp,N/Atomic Weight_reciprocal,N/Atomic Weight_power,N/Atomic Weight_tanh,Abs(Z-N)/Atomic Weight_sqrt,Abs(Z-N)/Atomic Weight_log,Abs(Z-N)/Atomic Weight_exp,Abs(Z-N)/Atomic Weight_reciprocal,Abs(Z-N)/Atomic Weight_power,Abs(Z-N)/Atomic Weight_tanh
0,1,0,1.007825,7.28897,0.0,2.486929e+62,62.0,8.0,0,-1,...,0.0,0.0,0.0,0.0,0.99611,0.689257,2.697258,1.007825,0.984532,0.758314
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.0,0,0,...,1.64296,2.014102,0.246511,0.45936,0.0,0.0,0.0,0.0,0.0,0.0
2,1,2,3.016049,14.949806,2.827266,388572800.0,8.0,7.019868,0,1,...,1.940837,1.508025,0.439727,0.580435,0.575812,0.286351,1.393139,3.016049,0.109932,0.319921
3,1,3,4.027806,25.901518,1.400351,9.917391000000001e-23,-23.0,9.917391e-17,0,2,...,2.106067,1.342602,0.55476,0.63205,0.704662,0.403161,1.64304,2.013903,0.24656,0.459398
4,1,4,5.035311,32.89244,1.33636,8.003509000000001e-23,-23.0,8.003509000000001e-17,0,3,...,2.21309,1.258828,0.631055,0.660889,0.771876,0.46737,1.814468,1.678437,0.354969,0.534049


In [7]:
magic_protons = [2, 8, 20, 28, 50, 82, 126]

df['Is Magic Number'] = df['Protons'].isin(magic_protons)
df['Magic Number'] = df['Is Magic Number']*df['Protons']
df['Closest Magic Number'] = df['Protons'].apply(lambda x: min(magic_protons, key=lambda y: abs(x - y)))
df['Distance To Magic Number_abs'] = abs(df['Closest Magic Number'] - df['Protons'])
df['Distance To Magic Number'] = df['Protons'] - df['Closest Magic Number']

df

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time,Artificial,N-Z,...,Abs(Z-N)/Atomic Weight_log,Abs(Z-N)/Atomic Weight_exp,Abs(Z-N)/Atomic Weight_reciprocal,Abs(Z-N)/Atomic Weight_power,Abs(Z-N)/Atomic Weight_tanh,Is Magic Number,Magic Number,Closest Magic Number,Distance To Magic Number_abs,Distance To Magic Number
0,1,0,1.007825,7.288970,0.000000,2.486929e+62,62.0,8.000000e+00,0,-1,...,0.689257,2.697258,1.007825,0.984532,0.758314,False,0,2,1,-1
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.000000e+00,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,False,0,2,1,-1
2,1,2,3.016049,14.949806,2.827266,3.885728e+08,8.0,7.019868e+00,0,1,...,0.286351,1.393139,3.016049,0.109932,0.319921,False,0,2,1,-1
3,1,3,4.027806,25.901518,1.400351,9.917391e-23,-23.0,9.917391e-17,0,2,...,0.403161,1.643040,2.013903,0.246560,0.459398,False,0,2,1,-1
4,1,4,5.035311,32.892440,1.336360,8.003509e-23,-23.0,8.003509e-17,0,3,...,0.467370,1.814468,1.678437,0.354969,0.534049,False,0,2,1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3612,114,169,285.293680,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,0.176290,1.212621,5.187158,0.037166,0.190430,False,0,126,12,-12
3613,115,170,287.309620,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,0.175155,1.210981,5.223811,0.036646,0.189127,False,0,126,11,-11
3614,116,171,289.325560,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,0.174035,1.209367,5.260465,0.036137,0.187840,False,0,126,10,-10
3615,117,172,291.341500,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,0.172929,1.207777,5.297118,0.035639,0.186571,False,0,126,9,-9


In [8]:
# Quantum number mappings for each type of atomic orbital
orbital_type_mappings = {'s': 0, 'p': 1, 'd': 2, 'f': 3, 'g': 4, 'h': 5}

# Maximum electron capacities for each type of atomic orbital
max_electron_capacity = {'s': 2, 'p': 6, 'd': 10, 'f': 14, 'g': 18, 'h': 22}

# Defined energy levels, including possible orbitals for superheavy elements
# These represent the maximum number of electrons that can be held at each energy level.
energy_levels = {
    '1s': 2, '2s': 2, '2p': 6, '3s': 2, '3p': 6, '4s': 2,
    '3d': 10, '4p': 6, '5s': 2, '4d': 10, '5p': 6, '6s': 2,
    '4f': 14, '5d': 10, '6p': 6, '7s': 2, '5f': 14, '6d': 10,
    '7p': 6, '8s': 2, '5g': 18, '8p': 6, '6f': 14, '5h': 22,
    '9s': 2, '6g': 24, '9p': 6, '7f': 14, '6h': 26, '10s': 2
}

def get_experimental_electron_config(protons):
    """
    Given a number of protons, return the electron configuration of the atom.
    """
    electron_config = {}
    for level in energy_levels:
        if protons - energy_levels[level] >= 0:
            electron_config[level] = energy_levels[level]
            protons -= energy_levels[level]
        else:
            electron_config[level] = protons
            break


    if electron_config[list(electron_config.keys())[-1]]==0:
      electron_config.popitem()
    return electron_config


In [9]:

def is_last_shell_filled(protons):
    """
    Given a number of protons, return whether the last energy level is fully filled.
    """
    electron_config = get_experimental_electron_config(protons)
    last_level, last_level_electrons = list(electron_config.items())[-1]
    last_level_type = last_level[-1]
    return int(last_level_electrons == max_electron_capacity[last_level_type])

def last_shell_fill_ratio(protons):
    """
    Given a number of protons, return the fill ratio of the last energy level.
    """
    electron_config = get_experimental_electron_config(protons)
    last_level, last_level_electrons = list(electron_config.items())[-1]
    last_level_type = last_level[-1]
    return last_level_electrons / max_electron_capacity[last_level_type]

def get_last_level_encoded(protons):
    """
    Given a number of protons, return an encoded value representing the last energy level.
    """
    electron_config = get_experimental_electron_config(protons)
    last_level = list(electron_config.keys())[-1]
    n = int(last_level[:-1])
    l = last_level[-1]
    return 10 * n + orbital_type_mappings[l]

def get_last_level_encoded_simple(protons):

    # Get the electron configuration
    electron_config = get_experimental_electron_config(protons)

    # Define the energy levels in the order of filling
    energy_levels_ordered = ['1s', '2s', '2p', '3s', '3p', '4s', '3d', '4p', '5s', '4d', '5p', '6s', '4f', '5d', '6p',
                             '7s', '5f', '6d', '7p', '8s', '5g', '8p', '6f', '5h', '9s', '6g', '9p', '7f', '6h', '10s']

    # Define a mapping from energy level to its order of filling
    energy_level_encoding_simple = {level: i for i, level in enumerate(energy_levels_ordered)}

    # Get the last energy level
    last_level = list(electron_config.keys())[-1]

    # Return the simple encoded value
    return energy_level_encoding_simple[last_level]


def get_last_principal_quantum_number(protons):
    # Get the electron configuration
    electron_config = get_experimental_electron_config(protons)

    # Extract the last principal quantum number from the keys of the electron configuration
    return int(list(electron_config.keys())[-1][:-1])

def get_last_azimuthal_quantum_number_encoded(protons):
    # Get the electron configuration
    electron_config = get_experimental_electron_config(protons)

    # Extract the last azimuthal quantum number from the keys of the electron configuration
    last_azimuthal_quantum_number = list(electron_config.keys())[-1][-1]

    # Return the encoded value
    return orbital_type_mappings[last_azimuthal_quantum_number]

In [10]:
# Create a new DataFrame containing the new columns
new_columns = pd.DataFrame({
    #'Electron_Config': df['Protons'].apply(get_experimental_electron_config),
    'Last Shell Filled': df['Protons'].apply(is_last_shell_filled),
    'Last Shell Fill Ratio': df['Protons'].apply(last_shell_fill_ratio),
    'Last Orbital Label Encoded (Complex)': df['Protons'].apply(get_last_level_encoded),
    'Last Orbital Label Encoded (Simple)': df['Protons'].apply(get_last_level_encoded_simple) + 1,
    'Last Principal Quantum Number': df['Protons'].apply(get_last_principal_quantum_number),
    'Last Azimuthal Quantum Number Encoded': df['Protons'].apply(get_last_azimuthal_quantum_number_encoded)
})

# Concatenate the original DataFrame with the new columns
df = pd.concat([df, new_columns], axis=1)

df['Even Z'] = df['Protons'] % 2
df['Even N'] = df['Neutrons'] % 2
df['Even N-Z'] = df['Even Z'] + 2*df['Even N']

df

  df['Even Z'] = df['Protons'] % 2
  df['Even N'] = df['Neutrons'] % 2
  df['Even N-Z'] = df['Even Z'] + 2*df['Even N']


Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time,Artificial,N-Z,...,Distance To Magic Number,Last Shell Filled,Last Shell Fill Ratio,Last Orbital Label Encoded (Complex),Last Orbital Label Encoded (Simple),Last Principal Quantum Number,Last Azimuthal Quantum Number Encoded,Even Z,Even N,Even N-Z
0,1,0,1.007825,7.288970,0.000000,2.486929e+62,62.0,8.000000e+00,0,-1,...,-1,0,0.500000,10,1,1,0,1,0,1
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.000000e+00,0,0,...,-1,0,0.500000,10,1,1,0,1,1,3
2,1,2,3.016049,14.949806,2.827266,3.885728e+08,8.0,7.019868e+00,0,1,...,-1,0,0.500000,10,1,1,0,1,0,1
3,1,3,4.027806,25.901518,1.400351,9.917391e-23,-23.0,9.917391e-17,0,2,...,-1,0,0.500000,10,1,1,0,1,1,3
4,1,4,5.035311,32.892440,1.336360,8.003509e-23,-23.0,8.003509e-17,0,3,...,-1,0,0.500000,10,1,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3612,114,169,285.293680,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-12,0,0.333333,71,19,7,1,0,1,2
3613,115,170,287.309620,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-11,0,0.500000,71,19,7,1,1,0,1
3614,116,171,289.325560,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-10,0,0.666667,71,19,7,1,0,1,2
3615,117,172,291.341500,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-9,0,0.833333,71,19,7,1,1,0,1


In [11]:
final_df = df.dropna()

final_df

Unnamed: 0,Protons,Neutrons,Atomic Weight,Mass Excess,Binding Energy,Half-life,Half-life Magnitude,Half-life Time,Artificial,N-Z,...,Distance To Magic Number,Last Shell Filled,Last Shell Fill Ratio,Last Orbital Label Encoded (Complex),Last Orbital Label Encoded (Simple),Last Principal Quantum Number,Last Azimuthal Quantum Number Encoded,Even Z,Even N,Even N-Z
0,1,0,1.007825,7.288970,0.000000,2.486929e+62,62.0,8.000000e+00,0,-1,...,-1,0,0.500000,10,1,1,0,1,0,1
1,1,1,2.014102,13.135722,1.112283,2.486929e+62,62.0,8.000000e+00,0,0,...,-1,0,0.500000,10,1,1,0,1,1,3
2,1,2,3.016049,14.949806,2.827266,3.885728e+08,8.0,7.019868e+00,0,1,...,-1,0,0.500000,10,1,1,0,1,0,1
3,1,3,4.027806,25.901518,1.400351,9.917391e-23,-23.0,9.917391e-17,0,2,...,-1,0,0.500000,10,1,1,0,1,1,3
4,1,4,5.035311,32.892440,1.336360,8.003509e-23,-23.0,8.003509e-17,0,3,...,-1,0,0.500000,10,1,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3612,114,169,285.293680,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-12,0,0.333333,71,19,7,1,0,1,2
3613,115,170,287.309620,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-11,0,0.500000,71,19,7,1,1,0,1
3614,116,171,289.325560,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-10,0,0.666667,71,19,7,1,0,1,2
3615,117,172,291.341500,0.000000,0.000000,0.000000e+00,0.0,0.000000e+00,1,55,...,-9,0,0.833333,71,19,7,1,1,0,1


In [12]:
final_df.to_csv('nuclear_physics_complete_df.csv')

In [13]:
### Additional Features like N+P/ N/P, etc.
### Transformations - Compare Origina Feature Vs. Feature with all Transformations
### All Chemistry Shit like azumuthal, shells, etc.
### Even Numbers + Magic Numbers Shit; Help visualize
### Compare w/ + w/out Artificial Elements

In [14]:
df['Distance To Magic Number']

0       -1
1       -1
2       -1
3       -1
4       -1
        ..
3612   -12
3613   -11
3614   -10
3615    -9
3616    -8
Name: Distance To Magic Number, Length: 3617, dtype: int64