In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Lambda, Dropout, BatchNormalization
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import backend as K

# Load and Preprocess the Dataset
df = pd.read_csv('/kaggle/input/spectra/Spectra_Data.csv', index_col=0)
labels = df.iloc[:, -1].values
features = df.iloc[:, :-1].values

# Convert labels to integers
labels = labels.astype(int)

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Balance the Dataset if Necessary
class_counts = np.bincount(labels)
min_class_count = np.min(class_counts)
if min_class_count < 50:  # Threshold for minimum samples per class
    indices = np.where(labels == np.argmin(class_counts))[0]
    extra_samples = indices[:len(indices) - min_class_count]
    X_train_balanced = np.vstack((X_train, X_train[extra_samples]))
    y_train_balanced = np.concatenate((y_train, y_train[extra_samples]))
else:
    X_train_balanced = X_train
    y_train_balanced = y_train

# Reshape input data
X_train_balanced = X_train_balanced.reshape(-1, X_train_balanced.shape[1], 1)
X_test = X_test.reshape(-1, X_test.shape[1], 1)

# Feature Extraction with 1D CNN
def create_cnn_model(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv1D(64, 3, activation='relu')(inputs)
    x = MaxPooling1D()(x)
    x = Conv1D(128, 3, activation='relu')(x)
    x = MaxPooling1D()(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)  # Added dropout
    x = BatchNormalization()(x)  # Added batch normalization
    return Model(inputs, x)

input_shape = (X_train_balanced.shape[1], 1)
feature_extractor = create_cnn_model(input_shape)
feature_extractor.compile(optimizer='adam', loss='mse')

# Siamese Neural Network for Classification
def euclidean_distance(vectors):
    x, y = vectors
    sum_square = tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True)
    return tf.sqrt(tf.maximum(sum_square, K.epsilon()))

def create_siamese_network(feature_extractor):
    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)
    processed_a = feature_extractor(input_a)
    processed_b = feature_extractor(input_b)
    distance = Lambda(euclidean_distance)([processed_a, processed_b])
    return Model([input_a, input_b], distance)

siamese_network = create_siamese_network(feature_extractor)

# Custom loss function with adjusted margin
def contrastive_loss(y_true, y_pred, margin=1.5):
    square_pred = tf.square(y_pred)
    margin_square = tf.square(tf.maximum(margin - y_pred, 0))
    return tf.reduce_mean(y_true * square_pred + (1 - y_true) * margin_square)

siamese_network.compile(optimizer=RMSprop(learning_rate=0.001), loss=contrastive_loss)

# Generate Pairs and Train the Siamese Network
def generate_pairs(X, y, num_positives=100, num_negatives=100):
    pos_pairs = []
    neg_pairs = []
    for _ in range(num_positives):
        pos_indices = np.where(y == 0)[0]
        if len(pos_indices) > 1:
            pair_idx = np.random.choice(pos_indices, size=2, replace=False)
            pos_pairs.append((pair_idx[0], pair_idx[1]))
   
    for _ in range(num_negatives):
        neg_indices = np.where(y == 1)[0]
        if len(neg_indices) > 1:
            pair_idx = np.random.choice(neg_indices, size=2, replace=False)
            neg_pairs.append((pair_idx[0], pair_idx[1]))
   
    return np.array(pos_pairs), np.array(neg_pairs)

pos_pairs, neg_pairs = generate_pairs(np.arange(len(y_train_balanced)), y_train_balanced)

# Check if any pairs were generated
if len(pos_pairs) == 0 or len(neg_pairs) == 0:
    raise ValueError("Insufficient samples for training.")

# Prepare the pairs and labels
X_pos_pairs = [X_train_balanced[pos_pairs[:, 0]], X_train_balanced[pos_pairs[:, 1]]]
X_neg_pairs = [X_train_balanced[neg_pairs[:, 0]], X_train_balanced[neg_pairs[:, 1]]]

X_a = np.concatenate([X_pos_pairs[0], X_neg_pairs[0]], axis=0)
X_b = np.concatenate([X_pos_pairs[1], X_neg_pairs[1]], axis=0)
y_pairs = np.concatenate([np.ones(len(pos_pairs)), np.zeros(len(neg_pairs))])

siamese_network.fit([X_a, X_b], y_pairs, epochs=50, batch_size=64)  # Increased number of epochs

# Evaluate the Model
threshold = 1.0  # Adjust based on your dataset
predictions = []
for i in range(0, len(X_test)-1, 2):  # Assuming pairs in the test set
    distance = siamese_network.predict([X_test[i:i+1], X_test[i+1:i+2]])
    predictions.append(distance < threshold)

accuracy = np.mean(predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")