In [7]:
#import libraries and google drive data
import pandas as pd
import re
from google.colab import drive
drive.mount('/content/drive')
file_path = '/content/drive/MyDrive/Official_Data.csv'
df = pd.read_csv(file_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
#convert timestamp columns to datetime format
timestamp_cols = ["Time Start", "Time End"]
for col in timestamp_cols:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], errors='coerce')

#droping the rows where spectral range values are missing
df_cleaned = df.dropna(subset=["Max Spectral Range (eV)", "Min Spectral Range (eV)"])
df_cleaned = df_cleaned.drop(columns=["Unnamed: 5"], errors='ignore')

In [12]:
#conversion factors needed
keV_to_eV = 1e3  # 1 keV = 1000 eV
MeV_to_eV = 1e6  # 1 MeV = 1,000,000 eV
h_c = 12398.42  # planck's constant * speed of light in eV * angstrom

#function to convert all values to eV
def convert_to_eV(value, unit):
    if unit.lower() == 'kev':
        return value * keV_to_eV
    elif unit.lower() == 'mev':
        return value * MeV_to_eV
    elif unit.lower() == 'angstrom':
        return h_c / value  # E (eV) = hc / lambda
    return value

#function to extract the numeric value and unit from the spectral range string
def extract_value_and_unit(spectral_range):
    if isinstance(spectral_range, (int, float)):
        return spectral_range, None
    match = re.match(r"(\d+(\.\d+)?)\s*(\w+)", str(spectral_range))
    if match:
        value = float(match.group(1))
        unit = match.group(3)
        return value, unit
    return None, None

#apply conversion to both the "Max Spectral Range (eV)" and the "Min Spectral Range (eV)"
def convert_row(row):
    max_value, max_unit = extract_value_and_unit(row['Max Spectral Range (eV)'])
    min_value, min_unit = extract_value_and_unit(row['Min Spectral Range (eV)'])
    if max_value is not None and max_unit is not None:
        row['Max Spectral Range (eV)'] = convert_to_eV(max_value, max_unit)
    if min_value is not None and min_unit is not None:
        row['Min Spectral Range (eV)'] = convert_to_eV(min_value, min_unit)
    return row

#apply the conversion function to each row and drop other columns
df = df.apply(convert_row, axis=1)
df = df.drop(['Max Spectral Range', 'Min Spectral Range'], axis=1, errors='ignore')