<a href="https://colab.research.google.com/github/DIPANJAN001/Andrew-Ng-Machine-Learning-Notes/blob/master/siamese.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Concatenate, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.losses import contrastive_loss
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Load your one-hot encoded dataset (categorical features)
# Ensure you have pairs of examples (severe, non-severe) for training

# Create pairs for the Siamese network
positive_pairs = []  # Pairs of similar claims (severe vs. severe)
negative_pairs = []  # Pairs of dissimilar claims (severe vs. non-severe)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(np.concatenate((positive_pairs, negative_pairs)), np.concatenate((np.ones(len(positive_pairs)), np.zeros(len(negative_pairs))), test_size=0.2, random_state=42)

# Define the Siamese network architecture
def siamese_network(input_shape):
    input = Input(shape=input_shape)
    x = Dense(128, activation='relu')(input)  # Adjust units and activation as needed
    x = Dense(64, activation='relu')(x)  # Add more layers or customize as needed
    output = Dense(32)(x)  # Embedding dimension

    return Model(input, output)

# Define the contrastive loss function
def contrastive_loss_with_margin(margin=1.0):
    def contrastive_loss(y_true, y_pred):
        square_pred = tf.square(y_pred)
        margin_square = tf.square(tf.maximum(margin - y_pred, 0))
        return tf.reduce_mean(y_true * square_pred + (1 - y_true) * margin_square)

    return contrastive_loss

# Build and compile the Siamese network
input_shape = (input_dimension,)  # Adjust input dimension
siamese_model = siamese_network(input_shape)

input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)

output_a = siamese_model(input_a)
output_b = siamese_model(input_b)

# Use Lambda layer to calculate the Euclidean distance between the embeddings
distance = Lambda(lambda x: tf.norm(x[0] - x[1], axis=-1, keepdims=True), output_shape=(1,))(Concatenate()([output_a, output_b]))

siamese_model = Model(inputs=[input_a, input_b], outputs=distance)

optimizer = Adam(learning_rate=0.001)
siamese_model.compile(loss=contrastive_loss_with_margin(margin=1.0), optimizer=optimizer)

# Train the Siamese network
siamese_model.fit([X_train[:, 0], X_train[:, 1]], y_train, batch_size=64, epochs=10, validation_split=0.2)

# Define a single-claim classifier
single_claim_input = Input(shape=input_shape)

# Add more Dense layers as needed
x = Dense(128, activation='relu')(single_claim_input)
x = Dense(64, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)

single_claim_classifier = Model(single_claim_input, output)

# Compile the single-claim classifier
single_claim_classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Extract embeddings for a single test claim using the Siamese network
test_claim_input = ...  # Your one-hot encoded test claim (preprocessed and shaped as needed)
test_claim_embedding = siamese_model.predict([test_claim_input, test_claim_input])

# Pass the test claim embedding to the single-claim classifier
severe_probability = single_claim_classifier.predict(test_claim_embedding)

# 'severe_probability' will contain the probability of the test claim being "severe."


In [None]:
# Extract embeddings for a single test claim using the Siamese network
test_claim_input = ...  # Your one-hot encoded test claim (preprocessed and shaped as needed)

# Use the Siamese model to get the embedding for the single test claim
test_claim_embedding = siamese_model.predict([test_claim_input, test_claim_input])

# Pass the test claim embedding to the single-claim classifier
severe_probability = single_claim_classifier.predict(test_claim_embedding)

# 'severe_probability' will contain the probability of the test claim being "severe."


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

# Assume one_hot_data is your one-hot encoded feature matrix and labels is your 0/1 labels.
one_hot_data = ...
labels = ...

# Create pairs and labels for Siamese network training
positive_pairs = []
negative_pairs = []

# Loop through the data and create pairs
for i in range(len(one_hot_data)):
    for j in range(len(one_hot_data)):
        if labels[i] == labels[j]:
            positive_pairs.append((one_hot_data[i], one_hot_data[j], 1))
        else:
            negative_pairs.append((one_hot_data[i], one_hot_data[j], 0))

# Combine positive and negative pairs
pairs = positive_pairs + negative_pairs
labels = np.array([pair[2] for pair in pairs])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(pairs, labels, test_size=0.2, random_state=42)
