# Particle Physics Analysis with AI Techniques

This notebook demonstrates utilizing AI techniques to analyze large datasets from particle physics experiments to identify patterns and new particles. We'll use machine learning techniques such as clustering and classification.

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Generate a synthetic dataset representing particle physics data
def generate_synthetic_data(n_samples=10000):
    np.random.seed(42)
    # Features: energy, momentum, mass, charge, etc.
    data = {
        'energy': np.random.normal(loc=50, scale=10, size=n_samples),
        'momentum_x': np.random.normal(loc=0, scale=1, size=n_samples),
        'momentum_y': np.random.normal(loc=0, scale=1, size=n_samples),
        'momentum_z': np.random.normal(loc=0, scale=1, size=n_samples),
        'mass': np.random.exponential(scale=1, size=n_samples),
        'charge': np.random.choice([-1, 0, 1], size=n_samples)
    }
    df = pd.DataFrame(data)
    
    # Simulate labels for known particles and an unknown particle
    known_particles = np.random.choice([0, 1], size=n_samples, p=[0.9, 0.1])
    unknown_particle = np.random.choice([0, 1], size=n_samples, p=[0.99, 0.01])
    df['label'] = known_particles + unknown_particle
    df['label'] = df['label'].apply(lambda x: 2 if x > 1 else x)  # 2 represents the unknown particle
    
    return df

In [3]:
# Load and preprocess the dataset
df = generate_synthetic_data()
X = df.drop('label', axis=1)
y = df['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Dimensionality reduction using PCA
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Visualization of the data using PCA components
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_train_pca[:, 0], y=X_train_pca[:, 1], hue=y_train, palette='viridis', s=50)
plt.title('PCA of Particle Physics Data')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.legend(title='Particle Type')
plt.show()

In [5]:
# Clustering using KMeans to identify potential new particles
kmeans = KMeans(n_clusters=3, random_state=42)
y_train_clusters = kmeans.fit_predict(X_train_pca)
y_test_clusters = kmeans.predict(X_test_pca)

# Visualization of the clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_train_pca[:, 0], y=X_train_pca[:, 1], hue=y_train_clusters, palette='viridis', s=50)
plt.title('KMeans Clustering of Particle Physics Data')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.legend(title='Cluster')
plt.show()

In [6]:
# Classification using Random Forest to identify known and unknown particles
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)

# Predictions and evaluation
y_pred = clf.predict(X_test_scaled)
print(classification_report(y_test, y_pred, target_names=['Known Particle 1', 'Known Particle 2', 'Unknown Particle']))
print(confusion_matrix(y_test, y_pred))

In [7]:
# Feature importance
feature_importances = pd.Series(clf.feature_importances_, index=X.columns)
feature_importances.sort_values(ascending=False, inplace=True)

plt.figure(figsize=(10, 6))
sns.barplot(x=feature_importances, y=feature_importances.index)
plt.title('Feature Importances in Particle Classification')
plt.xlabel('Importance Score')
plt.ylabel('Feature')
plt.show()