<a href="https://colab.research.google.com/github/CH1NMAY117/Space-Debris-Risk-Challenge-QML/blob/main/QML_Chinmay_Qiskit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Qiskit**

In [None]:
# STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("✅ Google Drive mounted successfully!")

# STEP 2: Install Qiskit with working versions
!pip install qiskit==0.44.0 qiskit-aer==0.12.0 qiskit-machine-learning==0.6.1 --quiet
print("✅ Qiskit installed with compatible versions!")

# STEP 3: Imports
import os
import glob
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, f1_score

from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.algorithms import QSVC
from qiskit.circuit.library import ZZFeatureMap
from qiskit.utils import QuantumInstance
from qiskit import Aer

print("✅ All libraries imported.")

# STEP 4: Define paths
drive_path = '/content/drive/MyDrive/QMLChallenge/'
chunk_output_dir = drive_path + 'chunks/'
os.makedirs(chunk_output_dir, exist_ok=True)

# STEP 5: Load data
train_df = pd.read_csv(drive_path + 'train.csv')
test_df = pd.read_csv(drive_path + 'test.csv')
print(f"✅ Train shape: {train_df.shape}, Test shape: {test_df.shape}")

# Preprocessing
train_df.fillna(train_df.median(numeric_only=True), inplace=True)
test_df.fillna(test_df.median(numeric_only=True), inplace=True)

X = train_df.drop(columns=['risk_level'])
y = train_df['risk_level']
X_test = test_df.copy()

# === Apply Feature Selection (Top 12 only) ===
selected_features = list(X.columns[:12])
X = X[selected_features]
X_test = X_test[selected_features]

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# QSVM Subset (1.2k samples)
X_qsub, _, y_qsub, _ = train_test_split(X_scaled, y, train_size=1200, stratify=y, random_state=42)

# Train/Val split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)
Xq_train, Xq_val, yq_train, yq_val = train_test_split(X_qsub, y_qsub, test_size=0.2, stratify=y_qsub, random_state=42)

print("✅ Features selected, scaled, and split.")

# STEP 6: Classical SVM
clf = SVC(kernel='rbf', class_weight='balanced', random_state=42)
clf.fit(X_train, y_train)
print("✅ Classical SVM trained.")

# STEP 7: Quantum SVM with Simplified Encoding
feature_map = ZZFeatureMap(feature_dimension=12, reps=1)  # Simple encoding
quantum_instance = QuantumInstance(backend=Aer.get_backend('aer_simulator'), shots=512, seed_simulator=42)
qkernel = QuantumKernel(feature_map=feature_map, quantum_instance=quantum_instance)

qsvc = QSVC(quantum_kernel=qkernel)
print(f"⚛️ Training QSVM on {Xq_train.shape[0]} samples with {Xq_train.shape[1]} features...")
start_qtrain = time.time()

qsvc.fit(Xq_train, yq_train)

elapsed_qtrain = time.time() - start_qtrain
print(f"✅ QSVM training completed in {elapsed_qtrain:.2f} seconds")

# STEP 8: Evaluate on Validation Set
y_val_pred_clf = clf.predict(X_val)
clf_bal_acc = balanced_accuracy_score(y_val, y_val_pred_clf)
clf_f1 = f1_score(y_val, y_val_pred_clf, average='macro')

y_val_pred_qsvc = qsvc.predict(Xq_val)
qsvc_bal_acc = balanced_accuracy_score(yq_val, y_val_pred_qsvc)
qsvc_f1 = f1_score(yq_val, y_val_pred_qsvc, average='macro')

print("\n📊 Classical SVM Validation Scores")
print(f"✅ Balanced Accuracy: {clf_bal_acc:.4f}")
print(f"✅ Macro F1-Score:    {clf_f1:.4f}")

print("\n⚛️ Quantum SVM Validation Scores")
print(f"✅ Balanced Accuracy: {qsvc_bal_acc:.4f}")
print(f"✅ Macro F1-Score:    {qsvc_f1:.4f}")

# STEP 9: Predict in Chunks with Resume
chunk_size = 100
total_samples = X_test_scaled.shape[0]
existing_chunks = sorted([int(f.split('_')[-1].split('.')[0])
                          for f in os.listdir(chunk_output_dir)
                          if f.startswith("pred_chunk_")])
start_chunk = max(existing_chunks) + 1 if existing_chunks else 0
num_chunks = (total_samples + chunk_size - 1) // chunk_size
times_per_chunk = []

print(f"\n🧠 Starting prediction in {num_chunks} chunks of {chunk_size} samples (resuming at chunk {start_chunk})...")

for chunk_idx in tqdm(range(start_chunk, num_chunks), desc="🔄 Processing Chunks"):
    chunk_start_time = time.time()

    start_idx = chunk_idx * chunk_size
    end_idx = min(start_idx + chunk_size, total_samples)

    print(f"\n🚀 Chunk {chunk_idx}: Samples {start_idx} to {end_idx - 1}")

    X_test_chunk = X_test_scaled[start_idx:end_idx]
    Xq_test_chunk_scaled = scaler.transform(X_test.iloc[start_idx:end_idx][selected_features])

    pred_classical = clf.predict(X_test_chunk)
    pred_quantum = qsvc.predict(Xq_test_chunk_scaled)

    df_chunk = pd.DataFrame({
        'sample_id': range(start_idx, end_idx),
        'label_classical': pred_classical,
        'label_quantum': pred_quantum
    })

    save_path = os.path.join(chunk_output_dir, f'pred_chunk_{chunk_idx}.csv')
    df_chunk.to_csv(save_path, index=False)

    chunk_elapsed = time.time() - chunk_start_time
    times_per_chunk.append(chunk_elapsed)
    avg_time = np.mean(times_per_chunk)
    est_remaining = avg_time * (num_chunks - chunk_idx - 1)

    print(f"✅ Saved chunk {chunk_idx} to: {save_path}")
    print(f"⏱️ Time: {chunk_elapsed:.2f}s | Avg: {avg_time:.2f}s | ETA: {est_remaining/60:.2f} min")
    print(f"🔢 Predicted {len(pred_classical)} classical & {len(pred_quantum)} quantum labels.")

# STEP 10: Merge Predictions
print("\n🧩 Merging all chunks...")

all_chunks = glob.glob(os.path.join(chunk_output_dir, 'pred_chunk_*.csv'))
final_df = pd.concat([pd.read_csv(f) for f in sorted(all_chunks)], ignore_index=True)
final_path = os.path.join(drive_path, "predictions.csv")
final_df.to_csv(final_path, index=False)

print(f"✅ Final predictions.csv saved to: {final_path}")
print(f"📦 Total samples: {final_df.shape[0]}")

Mounted at /content/drive
✅ Google Drive mounted successfully!
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m87.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.7/148.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m88.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.5/49.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:0

  quantum_instance = QuantumInstance(backend=Aer.get_backend('aer_simulator'), shots=512, seed_simulator=42)


✅ QSVM training completed in 11938.12 seconds
