In [None]:
# pip install matplotlib qiskit qiskit_aer qiskit_algorithms qiskit_machine_learning seaborn scikit-learn

In [26]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, MultiLabelBinarizer, OrdinalEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from qiskit import QuantumCircuit
from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap, EfficientSU2
from qiskit_aer import AerSimulator
from qiskit.primitives import StatevectorEstimator
from qiskit_algorithms.optimizers import SPSA, COBYLA
from qiskit_machine_learning.neural_networks import EstimatorQNN
from qiskit_machine_learning.algorithms.regressors import NeuralNetworkRegressor
from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager
from qiskit.quantum_info import SparsePauliOp
import seaborn as sns

In [None]:
print("=== Quantum Machine Learning for Egyptian Archaeological Dataset ===\n")


print("Loading dataset...")
file_path = '../dataset.csv'
df = pd.read_csv(file_path)

print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

# 1. Time Period Encoding (Ordinal)
ordinal_order = [
    "Antiguo Reino",
    "Primer Período Intermedio",
    "Imperio Medio",
    "Segundo Período Intermedio",
    "Imperio Nuevo",
    "Tercer Período Intermedio",
    "Periodo Tardío",
    "Periodo Ptolemaico",
    "Periodo Romano"
]
encoder = OrdinalEncoder(categories=[ordinal_order])
df['Period_Encoded'] = encoder.fit_transform(df[['Time Period']])

# 2. Script Detected Encoding (Value-based mapping)
scripts = {
    'Demótico': 0.9,
    'Cuneiforme':0.6,
    'Hierático': 0.85,
    'Griego': 0.7,
    'Copto': 0.8,
    'Jeroglífico': 1.0,
}
df['Script_encoded'] = df["Script Detected"].map(scripts)

# 3. Material Composition Multi-hot Encoding
df["Material Composition"] = df["Material Composition"].fillna("")
material_lists = df["Material Composition"].str.lower().str.strip().str.split(r",\s*")

mlb = MultiLabelBinarizer()
multi_hot = mlb.fit_transform(material_lists)
multi_hot_df = pd.DataFrame(multi_hot, columns=mlb.classes_, index=df.index)
df = pd.concat([df, multi_hot_df], axis=1)

# 4. Location Clustering
coords = df[['Longitude', 'Latitude']].dropna()
kmeans = KMeans(n_clusters=4, random_state=42)
df['LocationCluster'] = kmeans.fit_predict(coords)

print("Data preprocessing completed!")
print(f"Final dataset shape: {df.shape}")

=== Quantum Machine Learning for Egyptian Archaeological Dataset ===

Loading dataset...
Dataset shape: (500, 11)
Columns: ['Site ID', 'Latitude', 'Longitude', 'Time Period', 'Material Composition', 'Script Detected', 'AI Prediction Score', 'Human Activity Index', 'Climate Change Impact', 'Sonar Radar Detection', 'Looting Risk (%)']
Features selected: ['Human Activity Index', 'Climate Change Impact', 'Sonar Radar Detection', 'Looting Risk (%)', 'Period_Encoded', 'Script_encoded']


In [23]:
# Split the data
print("\nSplitting data into train/test sets...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=10
)

# Separate scalers for features and target
print("Scaling features...")
feature_scaler = StandardScaler()
target_scaler = StandardScaler()

# Scale features
X_train_scaled = feature_scaler.fit_transform(X_train)
X_test_scaled = feature_scaler.transform(X_test)

# Scale target (reshape if 1D)
y_train_scaled = target_scaler.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = target_scaler.transform(y_test.reshape(-1, 1))

# Flatten target again
Y_train_scaled = y_train_scaled.flatten()
Y_test_scaled = y_test_scaled.flatten()


# For quantum ML, we need to limit the number of features due to computational constraints
# Select the most important features (first 6)
n_quantum_features = min(6, X_train_scaled.shape[1])
X_train_quantum = X_train_scaled[:, :n_quantum_features]
X_test_quantum = X_test_scaled[:, :n_quantum_features]



Splitting data into train/test sets...
Scaling features...


In [28]:
# Train quantum model
print("Creating Quantum Neural Network...")

print(f"Creating QNN with {6} qubits for {n_quantum_features} features")

# Create quantum circuits
feature_map = ZZFeatureMap(feature_dimension=n_quantum_features, reps=2, entanglement='linear')
ansatz = EfficientSU2(6, reps=2, entanglement='linear')

# Create the complete circuit
qc = QuantumCircuit(6)
qc.compose(feature_map, inplace=True)
qc.compose(ansatz, inplace=True)

# Set up the estimator
estimator = StatevectorEstimator()


pass_manager = generate_preset_pass_manager(optimization_level=2)

# Create observable (Pauli-Z measurement on first qubit)
observable = SparsePauliOp.from_list([("Z" + "I" * (6-1), 1.0)])

# Create QNN
qnn = EstimatorQNN(
    circuit=qc,
    observables=observable,
    input_params=feature_map.parameters,
    weight_params=ansatz.parameters,
    estimator=estimator,
    gradient=None,
    pass_manager=pass_manager
)

# Set up optimizer
optimizer = SPSA(maxiter=150)  # Reduced iterations for faster training

# Create quantum regressor
qnn_regressor = NeuralNetworkRegressor(
    neural_network=qnn,
    optimizer=optimizer,
    loss='absolute_error'
)

print("Training quantum model...")
print("This may take several minutes...")

# Fit the model
qnn_regressor.fit(X_train_quantum, Y_train_scaled)

print("Training completed!")

Creating Quantum Neural Network...
Creating QNN with 6 qubits for 6 features
Training quantum model...
This may take several minutes...
Training completed!


In [29]:
# Make predictions
y_pred = qnn_regressor.predict(X_test_quantum)

# Calculate metrics
mse = mean_squared_error(Y_test_scaled, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(Y_test_scaled, y_pred)
r2 = r2_score(Y_test_scaled, y_pred)

print(f"Root Mean Square Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R² Score: {r2:.4f}")

print("\n=== Analysis Complete ===")

Root Mean Square Error (RMSE): 0.9675
Mean Absolute Error (MAE): 0.8505
R² Score: -0.0256

=== Analysis Complete ===
