<a href="https://colab.research.google.com/github/AnasWagih25/Particle_Classification/blob/main/Particle_Classification_MODEL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import pickle
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Define the model
class HiggsClassifier(nn.Module):
    def __init__(self, input_dim):
        super(HiggsClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)  # Binary classification (signal/background)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x

input_dim = 9

 # Initialize the model with the correct input dimension
model = HiggsClassifier(input_dim)

# Load the Higgs dataset
def load_data(file_path):
    column_names = [
        "label", "lepton_pT", "lepton_eta", "lepton_phi", "missing_energy_magnitude",
        "missing_energy_phi", "jet_1_pt", "jet_1_eta", "jet_1_phi", "jet_1_b-tag",
        "jet_2_pt", "jet_2_eta", "jet_2_phi", "jet_2_b-tag", "jet_3_pt",
        "jet_3_eta", "jet_3_phi", "jet_3_b-tag", "jet_4_pt", "jet_4_eta",
        "jet_4_phi", "jet_4_b-tag", "m_jj", "m_jjj", "m_lv", "m_jlv",
        "m_bb", "m_wbb", "m_wwbb"
    ]
    df = pd.read_csv(file_path, names=column_names)
    return df

# Preprocess the data
def preprocess_data(df):
    X = df.drop(columns=["label"])
    y = df["label"]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y.values, scaler

# Train the model
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.float(), labels.long()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

# Evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.float()
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())

    print("Classification Report:")
    print(classification_report(y_true, y_pred))
    print("Accuracy:", accuracy_score(y_true, y_pred))

    return y_true, y_pred

# Save model and scaler
def save_model_and_scaler(model, scaler):
    model_directory = '/content/drive/MyDrive/HIGGS DATASET'
    os.makedirs(model_directory, exist_ok=True)

    model_save_path = os.path.join(model_directory, 'Particle_Classification.pth')
    torch.save(model.state_dict(), model_save_path)

    scaler_save_path = os.path.join(model_directory, 'scaler.pkl')
    with open(scaler_save_path, 'wb') as f:
        pickle.dump(scaler, f)

    print("Model and scaler saved to Google Drive.")

# Prediction function
def predict_particle(model, input_features, scaler):
    input_features_scaled = scaler.transform([input_features])
    input_tensor = torch.tensor(input_features_scaled).float()

    model.eval()
    with torch.no_grad():
        outputs = model(input_tensor)
        _, predicted_class = torch.max(outputs, 1)

    return predicted_class.item()

# Function to take input and make prediction
def get_input_and_predict(model, scaler):
    print("Enter the following particle features:")

    lepton_pT = float(input("Lepton pT: "))
    lepton_eta = float(input("Lepton eta: "))
    lepton_phi = float(input("Lepton phi: "))
    missing_energy_magnitude = float(input("Missing Energy Magnitude: "))
    missing_energy_phi = float(input("Missing Energy Phi: "))

    jet_1_pt = float(input("Jet 1 pt: "))
    jet_1_eta = float(input("Jet 1 eta: "))
    jet_1_phi = float(input("Jet 1 phi: "))
    jet_1_b_tag = float(input("Jet 1 b-tag: "))

    input_features = [
        lepton_pT, lepton_eta, lepton_phi, missing_energy_magnitude,
        missing_energy_phi, jet_1_pt, jet_1_eta, jet_1_phi, jet_1_b_tag
    ]

    prediction = predict_particle(model, input_features, scaler)

    if prediction == 0:
        print("The model predicts the particle as Background.")
    else:
        print("The model predicts the particle as Signal.")

# Main pipeline to train the model
def main():
    # Load the data and preprocess
    file_path = '/content/drive/MyDrive/HIGGS DATASET/HIGGS.csv.gz'
    df = load_data(file_path)
    X_scaled, y, scaler = preprocess_data(df)

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    train_tensor = torch.utils.data.TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
    test_tensor = torch.utils.data.TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
    train_loader = torch.utils.data.DataLoader(train_tensor, batch_size=512, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_tensor, batch_size=512, shuffle=False)

    # Define the model, criterion, and optimizer
    input_dim = X_train.shape[1]
    model = HiggsClassifier(input_dim)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    train_model(model, train_loader, criterion, optimizer, epochs=10)

    # Evaluate the model
    evaluate_model(model, test_loader)

    # Save model and scaler
    save_model_and_scaler(model, scaler)

if __name__ == "__main__":
    main()


Epoch 1, Loss: 0.5604
Epoch 2, Loss: 0.5452
Epoch 3, Loss: 0.5413
Epoch 4, Loss: 0.5392
Epoch 5, Loss: 0.5377
Epoch 6, Loss: 0.5367
Epoch 7, Loss: 0.5358
Epoch 8, Loss: 0.5352
Epoch 9, Loss: 0.5346
Epoch 10, Loss: 0.5341
Classification Report:
              precision    recall  f1-score   support

         0.0       0.76      0.72      0.74   1034720
         1.0       0.76      0.80      0.78   1165280

    accuracy                           0.76   2200000
   macro avg       0.76      0.76      0.76   2200000
weighted avg       0.76      0.76      0.76   2200000

Accuracy: 0.7631790909090909
Model and scaler saved to Google Drive.
