<a href="https://colab.research.google.com/github/Sugaminni/Quantum-Machine-Learning-Submission/blob/main/M4_RF_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Ensure you have all the required libraries installed in Colab

!pip install pennylane # Install the PennyLane library for quantum computing and machine learning
!pip install matplotlib scikit-learn # Install matplotlib for data visualization and scikit-learn for machine learning algorithms

In [None]:
# Importing PennyLane for quantum machine learning
import pennylane as qml

# Importing PennyLane's version of numpy for quantum-compatible operations
from pennylane import numpy as np

# Importing pandas for data manipulation and analysis
import pandas as pd

# Importing train_test_split to split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

# Importing StandardScaler for feature scaling and LabelEncoder for encoding target labels
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Importing RandomForestClassifier for classification tasks
from sklearn.ensemble import RandomForestClassifier

# Importing SelectFromModel to select important features based on the model
from sklearn.feature_selection import SelectFromModel

# Importing accuracy_score to calculate the accuracy of the model
from sklearn.metrics import accuracy_score

# Importing matplotlib for data visualization
import matplotlib.pyplot as plt


In [None]:
import pandas as pd
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Mounts Google Drive to access files stored in your drive
drive.mount('/content/drive')

# Loads dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/BackOrders.csv', low_memory=False)

# Defines the feature columns and target column
features = ['national_inv', 'lead_time', 'in_transit_qty',
            'forecast_3_month', 'forecast_6_month', 'sales_1_month']
target = 'went_on_backorder'

# Forces numeric features (fixes "mixed types" warning)
for col in features:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Drops rows missing required fields (prevents scaler / quantum errors)
data = data.dropna(subset=features + [target])

# Selects features and target
X = data[features]
y = data[target]

# Encodes target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Splits FIRST (prevents leakage); stratify keeps class ratio stable
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Scales using training stats only (prevents leakage)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
# Initialize the Random Forest classifier with 100 decision trees and a fixed random seed for reproducibility
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the test data using the trained Random Forest model
rf_predictions = rf_classifier.predict(X_test)

# Calculate the accuracy of the Random Forest model on the test set
rf_accuracy = accuracy_score(y_test, rf_predictions)

# Print the accuracy of the Random Forest model
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")

In [None]:
# Initialize a new Random Forest classifier for feature selection with 100 decision trees
rf_selector = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest classifier on the training data to evaluate feature importance
rf_selector.fit(X_train, y_train)

# Select the top 3 most important features based on the Random Forest model
# Using SelectFromModel with max_features set to 3 and threshold=-np.inf to select features regardless of their importance score
selector = SelectFromModel(rf_selector, max_features=3, threshold=-np.inf)
selector.fit(X_train, y_train)

# Transform the training and test sets to include only the top 3 selected features
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

In [None]:
# Set up a quantum device with 3 qubits using the default qubit simulator
n_qubits = 3
dev = qml.device("default.qubit", wires=n_qubits)

# Define a quantum circuit using a qnode
@qml.qnode(dev)
def quantum_circuit(inputs, weights):
    # Encode the classical inputs as angles into the qubits
    qml.AngleEmbedding(inputs, wires=range(n_qubits))

    # Apply a strongly entangling layer using the provided weights
    qml.StronglyEntanglingLayers(weights, wires=range(n_qubits))

    # Return the expectation values of the PauliZ operator for each qubit
    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

# Define a hybrid model that sums up the quantum circuit's output
def hybrid_model(inputs, weights):
    return np.sum(quantum_circuit(inputs, weights))

# Define the cost function to measure the difference between predictions and actual labels
def cost(weights, features, labels):
    predictions = np.array([hybrid_model(x, weights) for x in features])
    return np.mean((predictions - labels)**2)

In [None]:
# Initialize random weights for the quantum circuit with a seed for reproducibility
np.random.seed(42)
num_layers = 4  # Number of layers in the quantum circuit
# Initialize random weights between -π/2 and π/2, shape corresponds to (num_layers, n_qubits, parameters per gate)
weights_init = np.random.uniform(low=-np.pi/2, high=np.pi/2, size=(num_layers, n_qubits, 3), requires_grad=True)

# Configure the Adam optimizer with a stepsize of 0.001
opt = qml.AdamOptimizer(stepsize=0.001)

# Set batch size and number of samples to use for training
batch_size = 64
num_train = 5000
train_features = X_train_selected[:num_train]  # Select the first 5000 training samples
train_labels = y_train[:num_train]

# Initialize the weights for training
weights = weights_init

costs_val = []  # List to store the cost values during training

# Train the quantum model for 100 optimization steps
for i in range(100):  # Perform 100 steps of optimization
    # Randomly select a batch of data for each step
    batch_index = np.random.randint(0, len(train_features), (batch_size,))
    X_batch = train_features[batch_index]
    y_batch = train_labels[batch_index]

    # Update the weights and calculate the cost for the current batch
    weights, cost_val = opt.step_and_cost(lambda w: cost(w, X_batch, y_batch), weights)

    # Append the cost value to the list
    costs_val.append(cost_val)

    # Print the cost every 10 steps
    if (i + 1) % 10 == 0:
        print(f"Step {i + 1}, Cost: {cost_val:.4f}")
# Plot the training loss over time
plt.plot(range(1, 101), costs_val)
plt.xlabel('Step')
plt.ylabel('Cost')
plt.title('Training cost Over Time')
plt.show()

In [None]:
# Function to make predictions using the trained hybrid quantum model
def predict(X):
    # Use the hybrid model to get predictions for each input in X
    predictions = np.array([hybrid_model(x, weights) for x in X])

    # Convert predictions to binary (0 or 1) using a threshold of 0.5
    return (predictions > 0.5).astype(int)

# Make predictions on the first 1000 test samples
hybrid_predictions = predict(X_test_selected[:1000])  # Adjust the number of samples based on resources

# Calculate the accuracy of the hybrid quantum model on the test set
hybrid_accuracy = accuracy_score(y_test[:1000], hybrid_predictions)

# Print the accuracy of the hybrid quantum model
print(f"Hybrid QML Accuracy: {hybrid_accuracy:.4f}")

In [None]:
# Create a bar chart to visualize accuracy comparison between Random Forest and Hybrid QML
plt.figure(figsize=(10, 6))

# Plot bars with specified colors: orange for Random Forest and green for Hybrid QML
plt.bar(['Random Forest', 'Hybrid QML'], [rf_accuracy, hybrid_accuracy], color=['orange', 'green'])

# Set the title and labels for the plot
plt.title('Model Accuracies')
plt.ylabel('Accuracy')
plt.ylim(0, 1)  # Set the y-axis limits from 0 to 1

# Add the accuracy values on top of each bar for clarity
for i, v in enumerate([rf_accuracy, hybrid_accuracy]):
    plt.text(i, v + 0.01, f'{v:.4f}', ha='center')

# Display the plot
plt.show()