In [20]:
import pandas as pd
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [42]:
# Load model
loaded_model = keras.models.load_model("rizzlers_model.h5")

In [39]:
# Load and preprocess data
def load_data(file_path):
    # Load the data from the CSV file, treating "error" as a missing value indicator and keeping the header as None
    data = pd.read_csv(file_path, header=None, na_values="error", dtype=str)
        
    # Convert all numeric columns to float, coercing non-numeric values to NaN
    for col in data.columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')
        
    # Extract labels (first column) and features (all other columns) as NumPy arrays
    labels = data.iloc[:, 0].values
    features = data.iloc[:, 1:].values
    
    # Return the features and labels
    return features, labels

def preprocess_data(features, labels):
    # Impute missing values with the mean of the respective column
    imputer = SimpleImputer(strategy='mean')
    features = imputer.fit_transform(features)
    
    # Scale the features using StandardScaler to have mean = 0 and standard deviation = 1
    scaler = StandardScaler()
    features = scaler.fit_transform(features)
    
    # Return the preprocessed features and the original labels
    return features, labels

In [40]:
# Load and preprocess data
X_test, y_test = load_data('HIGGS_train.csv')
X_test, y_test = preprocess_data(X_test, y_test)

In [43]:
# Evaluate the model
loss, accuracy = loaded_model.evaluate(X_test, y_test)
print(f"Loss: {loss}, Accuracy: {accuracy}")

# Make predictions
y_pred = loaded_model.predict(X_test)

Loss: 0.45800071954727173, Accuracy: 0.7760983109474182
