#### Data loading and type checking

In [9]:
import pandas as pd

# Load CSV (no headers)
df = pd.read_csv('HIGGS_8K.csv', header=None)

# Check column 17 for problematic values
print("Column 17 sample values:")
print(df[17].head())

Column 17 sample values:
0    0.000000000000000000e+00.1
1                           0.0
2             2.548224449157715
3                           0.0
4                           0.0
Name: 17, dtype: object


#### Fix Scientific Notation in Column 17

In [10]:
# Clean column 17: Replace "e+00.X" with "e-X" (e.g., "e+00.1" → "e-1")
df[17] = df[17].str.replace(r'e\+00\.(\d+)', r'e-\1', regex=True).astype(float)

# Verify
print("Column 17 after cleaning:")
print(df[17].head())

Column 17 after cleaning:
0    0.000000
1    0.000000
2    2.548224
3    0.000000
4    0.000000
Name: 17, dtype: float64


#### Saving Cleaned Data

In [11]:
# Save to CSV
df.to_csv('HIGGS_8K_cleaned.csv', index=False, header=None, float_format='%.18e')
print("Saved to 'HIGGS_8K_cleaned.csv'")

Saved to 'HIGGS_8K_cleaned.csv'
