# Test Notebook for Profiles (1 FPS)
This notebook loads the saved models (CNN, MLP, Random Forest) trained on 1fps profile data and runs tests on a single selected CSV file.

In [1]:
# --- User Input ---
# Please provide the relative path to the CSV file you want to test.
TEST_FILE_PATH = "../NNATT dataset/Albit/1fps/PROFILES_Albit113_planar1_1FPSnormalised.csv"

In [2]:
# --- Imports ---
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import joblib
from sklearn.ensemble import RandomForestClassifier

# Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Define Class Names (Must match training order)
# Based on the folders in 'NNATT dataset'
CLASS_NAMES = sorted(['Albit', 'Calcite', 'Dolomit', 'Feldspat', 'Quarz', 'Rhodocrosite', 'Tile'])
NUM_CLASSES = len(CLASS_NAMES)
print(f"Classes: {CLASS_NAMES}")

Using device: mps
Classes: ['Albit', 'Calcite', 'Dolomit', 'Feldspat', 'Quarz', 'Rhodocrosite', 'Tile']


In [3]:
# --- Model Definitions ---

# 1. 1D CNN
class Model1DCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.batch_norm_1 = nn.BatchNorm1d(num_features=(1))
        self.conv1 = nn.Conv1d(1, 16, kernel_size=9, stride=3)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=9, stride=3)
        self.layer_norm_1 = nn.LayerNorm(normalized_shape=32)

        self.fnn1 = nn.Linear(32, num_classes)
    
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool1d(kernel_size=2)
        self.global_pool = nn.AdaptiveMaxPool1d(output_size=1)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.batch_norm_1(x)
        x = self.relu(self.conv1(x))
        x = self.max_pool(x)
        x = self.relu(self.conv2(x))
        x = self.global_pool(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.layer_norm_1(x)
        x = self.fnn1(x)
        return x

# 2. MLP
class MLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )
        
    def forward(self, x):
        return self.layers(x)

In [4]:
# --- Load Models ---

# Paths to saved models
MODEL_DIR = "../models"
CNN_PATH = os.path.join(MODEL_DIR, "model_cnn_profiles_1fps.pth")
MLP_PATH = os.path.join(MODEL_DIR, "model_mlp_profiles_1fps.pth")
RF_PATH = os.path.join(MODEL_DIR, "model_rf_profiles_1fps.joblib")

# Load CNN
model_cnn = Model1DCNN(num_classes=NUM_CLASSES)
try:
    if os.path.exists(CNN_PATH):
        model_cnn.load_state_dict(torch.load(CNN_PATH, map_location=device))
        print("CNN Model loaded successfully.")
    else:
        print(f"CNN Model not found at {CNN_PATH}")
except Exception as e:
    print(f"Error loading CNN model: {e}")
model_cnn.to(device)
model_cnn.eval()

# Load MLP
# We need to determine input_size from the saved weights
try:
    if os.path.exists(MLP_PATH):
        mlp_state_dict = torch.load(MLP_PATH, map_location=device)
        input_size = mlp_state_dict['layers.0.weight'].shape[1]
        print(f"Detected MLP input size: {input_size}")
        
        model_mlp = MLP(input_size=input_size, num_classes=NUM_CLASSES)
        model_mlp.load_state_dict(mlp_state_dict)
        print("MLP Model loaded successfully.")
        model_mlp.to(device)
        model_mlp.eval()
    else:
        print(f"MLP Model not found at {MLP_PATH}")
        model_mlp = None
except Exception as e:
    print(f"Error loading MLP model: {e}")
    model_mlp = None

# Load Random Forest
try:
    if os.path.exists(RF_PATH):
        model_rf = joblib.load(RF_PATH)
        print("Random Forest Model loaded successfully.")
    else:
        print(f"Random Forest Model not found at {RF_PATH}")
        model_rf = None
except Exception as e:
    print(f"Error loading Random Forest model: {e}")
    model_rf = None

CNN Model loaded successfully.
Detected MLP input size: 1060
MLP Model loaded successfully.
Random Forest Model loaded successfully.


In [5]:
# --- Data Processing and Inference ---

def load_and_preprocess_single_file(filepath):
    """
    Loads a single CSV file and prepares it for inference.
    """
    try:
        data = pd.read_csv(filepath, dtype=np.float32, header=None)
        X_np = data.values
        return X_np
    except Exception as e:
        print(f"Error reading file {filepath}: {e}")
        return None

if os.path.exists(TEST_FILE_PATH):
    print(f"Processing file: {TEST_FILE_PATH}")
    X_input = load_and_preprocess_single_file(TEST_FILE_PATH)
    
    if X_input is not None:
        print(f"Input shape: {X_input.shape}")
        
        # Prepare for PyTorch models
        X_tensor = torch.tensor(X_input, dtype=torch.float32).to(device)
        
        # --- CNN Inference ---
        print("\n--- CNN Inference ---")
        with torch.no_grad():
            outputs_cnn = model_cnn(X_tensor)
            probs_cnn = torch.softmax(outputs_cnn, dim=1)
            _, preds_cnn = torch.max(outputs_cnn, 1)
            
            avg_probs_cnn = torch.mean(probs_cnn, dim=0)
            final_pred_idx_cnn = torch.argmax(avg_probs_cnn).item()
            
            print(f"Aggregated Prediction: {CLASS_NAMES[final_pred_idx_cnn]}")
            print(f"Confidence: {avg_probs_cnn[final_pred_idx_cnn]:.4f}")

        # --- MLP Inference ---
        if model_mlp:
            print("\n--- MLP Inference ---")
            
            if X_input.shape[1] != input_size:
                print(f"Warning: Model expects input size {input_size}, but file has {X_input.shape[1]}. Resizing or trimming might be needed.")
                if X_input.shape[1] > input_size:
                    X_tensor_mlp = X_tensor[:, :input_size]
                else:
                    print("Padding not implemented for inference demo. Process aborted for MLP.")
                    X_tensor_mlp = None
            else:
                 X_tensor_mlp = X_tensor

            if X_tensor_mlp is not None:
                with torch.no_grad():
                    outputs_mlp = model_mlp(X_tensor_mlp)
                    probs_mlp = torch.softmax(outputs_mlp, dim=1)
                    avg_probs_mlp = torch.mean(probs_mlp, dim=0)
                    final_pred_idx_mlp = torch.argmax(avg_probs_mlp).item()
                    
                    print(f"Aggregated Prediction: {CLASS_NAMES[final_pred_idx_mlp]}")
                    print(f"Confidence: {avg_probs_mlp[final_pred_idx_mlp]:.4f}")

        # --- Random Forest Inference ---
        if model_rf:
            print("\n--- Random Forest Inference ---")
            if X_input.shape[1] == model_rf.n_features_in_:
                preds_rf = model_rf.predict(X_input)
                vals, counts = np.unique(preds_rf, return_counts=True)
                majority_idx = vals[np.argmax(counts)]
                
                print(f"Aggregated Prediction: {CLASS_NAMES[int(majority_idx)]}")
                print(f"Votes: {counts[np.argmax(counts)]}/{len(preds_rf)}")
            else:
                print(f"Feature mismatch for RF: Expected {model_rf.n_features_in_}, got {X_input.shape[1]}")

else:
    print(f"File not found: {TEST_FILE_PATH}")

Processing file: ../NNATT dataset/Albit/1fps/PROFILES_Albit113_planar1_1FPSnormalised.csv
Input shape: (10000, 1060)

--- CNN Inference ---
Aggregated Prediction: Albit
Confidence: 0.9956

--- MLP Inference ---
Aggregated Prediction: Albit
Confidence: 1.0000

--- Random Forest Inference ---
Aggregated Prediction: Albit
Votes: 10000/10000
