In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Concatenate
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve
import pandas as pd

# Load both datasets
csv_path_1 = "/kaggle/input/embeddings-95-330/mert-v0-initial.csv"
csv_path_2 = "/kaggle/input/embeddings-95-330/mert-v1-330.csv"

df1 = pd.read_csv(csv_path_1).iloc[:, 1:]  # Remove first column if it's an index
df2 = pd.read_csv(csv_path_2).iloc[:, 1:]

# Ensure both have the 'label' column
if "label" not in df1.columns or "label" not in df2.columns:
    raise ValueError("One or both datasets do not contain a 'label' column.")

# Separate features and labels for each dataset
X1, y = df1.drop(columns=["label"]).values, df1["label"].values
X2 = df2.drop(columns=["label"]).values

# Ensure labels are the same across datasets
if not np.array_equal(y, df2["label"].values):
    raise ValueError("Mismatch in labels between datasets.")

# Split into train and test sets (70%-30%)
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(
    X1, X2, y, test_size=0.2, random_state=42, stratify=y
)

# Define CNN for Dataset 1
input1 = Input(shape=(X1.shape[1],))
x1 = tf.keras.layers.Reshape((X1.shape[1], 1))(input1)
x1 = Conv1D(32, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Conv1D(64, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Flatten()(x1)

# Define CNN for Dataset 2
input2 = Input(shape=(X2.shape[1],))
x2 = tf.keras.layers.Reshape((X2.shape[1], 1))(input2)
x2 = Conv1D(32, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Conv1D(64, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Flatten()(x2)

# Fusion: Concatenate CNN outputs
fused = Concatenate()([x1, x2])

# Fully Connected Network (FCN)
fcn = Dense(128, activation='relu')(fused)
fcn = Dropout(0.5)(fcn)
fcn = Dense(64, activation='relu')(fcn)
fcn = Dropout(0.3)(fcn)
output = Dense(1, activation='sigmoid')(fcn)

# Build and Compile the Model
model = Model(inputs=[input1, input2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, validation_data=([X1_test, X2_test], y_test))

# Evaluate the Model
loss, accuracy = model.evaluate([X1_test, X2_test], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions and performance metrics
y_scores = model.predict([X1_test, X2_test])
y_pred = (y_scores > 0.5).astype(int)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Calculate EER
def calculate_eer(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    return eer, eer_threshold

eer, eer_threshold = calculate_eer(y_test, y_scores)
print(f"EER: {eer:.4f} (Threshold: {eer_threshold:.4f})")


Epoch 1/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.9862 - loss: 0.0374 - val_accuracy: 1.0000 - val_loss: 5.1501e-13
Epoch 2/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 3.4049e-05 - val_accuracy: 1.0000 - val_loss: 4.6852e-23
Epoch 3/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 4.6971e-06 - val_accuracy: 1.0000 - val_loss: 3.6990e-24
Epoch 4/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 1.5276e-05 - val_accuracy: 1.0000 - val_loss: 2.1212e-25
Epoch 5/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 2.0511e-06 - val_accuracy: 1.0000 - val_loss: 4.3409e-24
Epoch 6/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 2.9156e-08 - val_accuracy: 1.0000 

### Concatinate the two models (Mert-v0 and Music2vec ) 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Concatenate
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve
import pandas as pd


csv_path_1 = "/kaggle/input/embeddings-95-330/mert-v0-initial.csv"
csv_path_2 = "/kaggle/input/embeddings-95-330/m2v_merged_dataset.csv"

df1 = pd.read_csv(csv_path_1).iloc[:, 1:] 
df2 = pd.read_csv(csv_path_2).iloc[:, 1:]


if "label" not in df1.columns or "label" not in df2.columns:
    raise ValueError("One or both datasets do not contain a 'label' column.")


X1, y = df1.drop(columns=["label"]).values, df1["label"].values
X2 = df2.drop(columns=["label"]).values


if not np.array_equal(y, df2["label"].values):
    raise ValueError("Mismatch in labels between datasets.")

# Spliting into train and test sets (70%-30%)
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(
    X1, X2, y, test_size=0.2, random_state=42, stratify=y
)

# Define CNN for Dataset 1
input1 = Input(shape=(X1.shape[1],))
x1 = tf.keras.layers.Reshape((X1.shape[1], 1))(input1)
x1 = Conv1D(32, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Conv1D(64, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Flatten()(x1)

# Define CNN for Dataset 2
input2 = Input(shape=(X2.shape[1],))
x2 = tf.keras.layers.Reshape((X2.shape[1], 1))(input2)
x2 = Conv1D(32, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Conv1D(64, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Flatten()(x2)

# Fusion: Concatenate CNN outputs
fused = Concatenate()([x1, x2])

# Fully Connected Network (FCN)
fcn = Dense(128, activation='relu')(fused)
fcn = Dropout(0.5)(fcn)
fcn = Dense(64, activation='relu')(fcn)
fcn = Dropout(0.3)(fcn)
output = Dense(1, activation='sigmoid')(fcn)

# Build and Compile the Model
model = Model(inputs=[input1, input2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, validation_data=([X1_test, X2_test], y_test))

# Evaluate the Model
loss, accuracy = model.evaluate([X1_test, X2_test], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions and performance metrics
y_scores = model.predict([X1_test, X2_test])
y_pred = (y_scores > 0.5).astype(int)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Calculate EER
def calculate_eer(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    return eer, eer_threshold

eer, eer_threshold = calculate_eer(y_test, y_scores)
print(f"EER: {eer:.4f} (Threshold: {eer_threshold:.4f})")


Epoch 1/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.9919 - loss: 0.0184 - val_accuracy: 1.0000 - val_loss: 4.4407e-15
Epoch 2/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 2.7076e-06 - val_accuracy: 1.0000 - val_loss: 1.8351e-19
Epoch 3/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 3.9109e-06 - val_accuracy: 1.0000 - val_loss: 2.6789e-19
Epoch 4/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 4.7672e-07 - val_accuracy: 1.0000 - val_loss: 1.1004e-21
Epoch 5/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 4.3679e-08 - val_accuracy: 1.0000 - val_loss: 1.6489e-22
Epoch 6/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 8.4673e-09 - val_accuracy: 1.0000 

### Concatinate the two models (Mert-v0 and Mert-95 ) 

In [None]:
# Load both datasets
csv_path_1 = "/kaggle/input/embeddings-95-330/mert-v0-initial.csv"
csv_path_2 = "/kaggle/input/embeddings-95-330/95_merged_dataset.csv"

df1 = pd.read_csv(csv_path_1).iloc[:, 1:]  # Remove first column if it's an index
df2 = pd.read_csv(csv_path_2).iloc[:, 1:]

# Ensure both have the 'label' column
if "label" not in df1.columns or "label" not in df2.columns:
    raise ValueError("One or both datasets do not contain a 'label' column.")

# Separate features and labels for each dataset
X1, y = df1.drop(columns=["label"]).values, df1["label"].values
X2 = df2.drop(columns=["label"]).values

# Ensure labels are the same across datasets
if not np.array_equal(y, df2["label"].values):
    raise ValueError("Mismatch in labels between datasets.")

# Split into train and test sets (70%-30%)
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(
    X1, X2, y, test_size=0.2, random_state=42, stratify=y
)

# Define CNN for Dataset 1
input1 = Input(shape=(X1.shape[1],))
x1 = tf.keras.layers.Reshape((X1.shape[1], 1))(input1)
x1 = Conv1D(32, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Conv1D(64, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Flatten()(x1)

# Define CNN for Dataset 2
input2 = Input(shape=(X2.shape[1],))
x2 = tf.keras.layers.Reshape((X2.shape[1], 1))(input2)
x2 = Conv1D(32, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Conv1D(64, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Flatten()(x2)

# Fusion: Concatenate CNN outputs
fused = Concatenate()([x1, x2])

# Fully Connected Network (FCN)
fcn = Dense(128, activation='relu')(fused)
fcn = Dropout(0.5)(fcn)
fcn = Dense(64, activation='relu')(fcn)
fcn = Dropout(0.3)(fcn)
output = Dense(1, activation='sigmoid')(fcn)

# Build and Compile the Model
model = Model(inputs=[input1, input2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, validation_data=([X1_test, X2_test], y_test))

# Evaluate the Model
loss, accuracy = model.evaluate([X1_test, X2_test], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions and performance metrics
y_scores = model.predict([X1_test, X2_test])
y_pred = (y_scores > 0.5).astype(int)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Calculate EER
def calculate_eer(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    return eer, eer_threshold

eer, eer_threshold = calculate_eer(y_test, y_scores)
print(f"EER: {eer:.4f} (Threshold: {eer_threshold:.4f})")


Epoch 1/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 11ms/step - accuracy: 0.9917 - loss: 0.0175 - val_accuracy: 1.0000 - val_loss: 1.4095e-12
Epoch 2/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 2.1330e-06 - val_accuracy: 1.0000 - val_loss: 3.3489e-14
Epoch 3/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 7.3261e-07 - val_accuracy: 1.0000 - val_loss: 1.3977e-15
Epoch 4/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 8.5116e-07 - val_accuracy: 1.0000 - val_loss: 1.5197e-17
Epoch 5/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 1.4408e-07 - val_accuracy: 1.0000 - val_loss: 2.4926e-18
Epoch 6/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 1.0000 - loss: 9.8873e-09 - val_accuracy: 1.0000 

In [None]:
# Load both datasets
csv_path_1 = "/kaggle/input/embeddings-95-330/95_merged_dataset.csv"
csv_path_2 = "/kaggle/input/embeddings-95-330/m2v_merged_dataset.csv"

df1 = pd.read_csv(csv_path_1).iloc[:, 1:]  # Remove first column if it's an index
df2 = pd.read_csv(csv_path_2).iloc[:, 1:]

# Ensure both have the 'label' column
if "label" not in df1.columns or "label" not in df2.columns:
    raise ValueError("One or both datasets do not contain a 'label' column.")

# Separate features and labels for each dataset
X1, y = df1.drop(columns=["label"]).values, df1["label"].values
X2 = df2.drop(columns=["label"]).values

# Ensure labels are the same across datasets
if not np.array_equal(y, df2["label"].values):
    raise ValueError("Mismatch in labels between datasets.")

# Split into train and test sets (70%-30%)
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(
    X1, X2, y, test_size=0.2, random_state=42, stratify=y
)

# Define CNN for Dataset 1
input1 = Input(shape=(X1.shape[1],))
x1 = tf.keras.layers.Reshape((X1.shape[1], 1))(input1)
x1 = Conv1D(32, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Conv1D(64, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Flatten()(x1)

# Define CNN for Dataset 2
input2 = Input(shape=(X2.shape[1],))
x2 = tf.keras.layers.Reshape((X2.shape[1], 1))(input2)
x2 = Conv1D(32, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Conv1D(64, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Flatten()(x2)

# Fusion: Concatenate CNN outputs
fused = Concatenate()([x1, x2])

# Fully Connected Network (FCN)
fcn = Dense(128, activation='relu')(fused)
fcn = Dropout(0.5)(fcn)
fcn = Dense(64, activation='relu')(fcn)
fcn = Dropout(0.3)(fcn)
output = Dense(1, activation='sigmoid')(fcn)

# Build and Compile the Model
model = Model(inputs=[input1, input2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, validation_data=([X1_test, X2_test], y_test))

# Evaluate the Model
loss, accuracy = model.evaluate([X1_test, X2_test], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions and performance metrics
y_scores = model.predict([X1_test, X2_test])
y_pred = (y_scores > 0.5).astype(int)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Calculate EER
def calculate_eer(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    return eer, eer_threshold

eer, eer_threshold = calculate_eer(y_test, y_scores)
print(f"EER: {eer:.4f} (Threshold: {eer_threshold:.4f})")


Epoch 1/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.9769 - loss: 0.0542 - val_accuracy: 1.0000 - val_loss: 6.8771e-08
Epoch 2/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 1.0000 - loss: 2.2568e-04 - val_accuracy: 1.0000 - val_loss: 7.7833e-12
Epoch 3/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 1.0000 - loss: 2.3767e-04 - val_accuracy: 1.0000 - val_loss: 7.3404e-14
Epoch 4/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 1.0000 - loss: 1.3594e-04 - val_accuracy: 1.0000 - val_loss: 1.4611e-11
Epoch 5/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.9999 - loss: 1.3829e-04 - val_accuracy: 1.0000 - val_loss: 1.4775e-14
Epoch 6/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 1.0000 - loss: 2.4546e-06 - val_accuracy: 1.0000 - val

### Concatinate the two models (Mert-95 and Music2vec ) 

In [None]:
# Load both datasets
csv_path_1 = "/kaggle/input/embeddings-95-330/95_merged_dataset.csv"
csv_path_2 = "/kaggle/input/embeddings-95-330/mert-v1-330.csv"

df1 = pd.read_csv(csv_path_1).iloc[:, 1:]  # Remove first column if it's an index
df2 = pd.read_csv(csv_path_2).iloc[:, 1:]

# Ensure both have the 'label' column
if "label" not in df1.columns or "label" not in df2.columns:
    raise ValueError("One or both datasets do not contain a 'label' column.")

# Separate features and labels for each dataset
X1, y = df1.drop(columns=["label"]).values, df1["label"].values
X2 = df2.drop(columns=["label"]).values

# Ensure labels are the same across datasets
if not np.array_equal(y, df2["label"].values):
    raise ValueError("Mismatch in labels between datasets.")

# Split into train and test sets (70%-30%)
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(
    X1, X2, y, test_size=0.2, random_state=42, stratify=y
)

# Define CNN for Dataset 1
input1 = Input(shape=(X1.shape[1],))
x1 = tf.keras.layers.Reshape((X1.shape[1], 1))(input1)
x1 = Conv1D(32, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Conv1D(64, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Flatten()(x1)

# Define CNN for Dataset 2
input2 = Input(shape=(X2.shape[1],))
x2 = tf.keras.layers.Reshape((X2.shape[1], 1))(input2)
x2 = Conv1D(32, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Conv1D(64, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Flatten()(x2)

# Fusion: Concatenate CNN outputs
fused = Concatenate()([x1, x2])

# Fully Connected Network (FCN)
fcn = Dense(128, activation='relu')(fused)
fcn = Dropout(0.5)(fcn)
fcn = Dense(64, activation='relu')(fcn)
fcn = Dropout(0.3)(fcn)
output = Dense(1, activation='sigmoid')(fcn)

# Build and Compile the Model
model = Model(inputs=[input1, input2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, validation_data=([X1_test, X2_test], y_test))

# Evaluate the Model
loss, accuracy = model.evaluate([X1_test, X2_test], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions and performance metrics
y_scores = model.predict([X1_test, X2_test])
y_pred = (y_scores > 0.5).astype(int)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Calculate EER
def calculate_eer(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    return eer, eer_threshold

eer, eer_threshold = calculate_eer(y_test, y_scores)
print(f"EER: {eer:.4f} (Threshold: {eer_threshold:.4f})")


Epoch 1/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 12ms/step - accuracy: 0.9830 - loss: 0.0514 - val_accuracy: 1.0000 - val_loss: 1.4310e-09
Epoch 2/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 8.0856e-05 - val_accuracy: 1.0000 - val_loss: 2.1469e-12
Epoch 3/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 2.5366e-05 - val_accuracy: 1.0000 - val_loss: 4.6870e-15
Epoch 4/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.9999 - loss: 3.4181e-04 - val_accuracy: 1.0000 - val_loss: 1.7934e-14
Epoch 5/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 6.2574e-05 - val_accuracy: 1.0000 - val_loss: 5.8724e-16
Epoch 6/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 1.1243e-05 - val_accuracy: 1.0000 

### Concatinate the two models (Music2vec and Mert-v1-330 ) 

In [None]:
# Load both datasets
csv_path_1 = "/kaggle/input/embeddings-95-330/m2v_merged_dataset.csv"
csv_path_2 = "/kaggle/input/embeddings-95-330/mert-v1-330.csv"

df1 = pd.read_csv(csv_path_1).iloc[:, 1:]  # Remove first column if it's an index
df2 = pd.read_csv(csv_path_2).iloc[:, 1:]

# Ensure both have the 'label' column
if "label" not in df1.columns or "label" not in df2.columns:
    raise ValueError("One or both datasets do not contain a 'label' column.")

# Separate features and labels for each dataset
X1, y = df1.drop(columns=["label"]).values, df1["label"].values
X2 = df2.drop(columns=["label"]).values

# Ensure labels are the same across datasets
if not np.array_equal(y, df2["label"].values):
    raise ValueError("Mismatch in labels between datasets.")

# Split into train and test sets (70%-30%)
X1_train, X1_test, X2_train, X2_test, y_train, y_test = train_test_split(
    X1, X2, y, test_size=0.2, random_state=42, stratify=y
)

# Define CNN for Dataset 1
input1 = Input(shape=(X1.shape[1],))
x1 = tf.keras.layers.Reshape((X1.shape[1], 1))(input1)
x1 = Conv1D(32, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Conv1D(64, kernel_size=3, activation='relu')(x1)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = Flatten()(x1)

# Define CNN for Dataset 2
input2 = Input(shape=(X2.shape[1],))
x2 = tf.keras.layers.Reshape((X2.shape[1], 1))(input2)
x2 = Conv1D(32, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Conv1D(64, kernel_size=3, activation='relu')(x2)
x2 = MaxPooling1D(pool_size=2)(x2)
x2 = Flatten()(x2)

# Fusion: Concatenate CNN outputs
fused = Concatenate()([x1, x2])

# Fully Connected Network (FCN)
fcn = Dense(128, activation='relu')(fused)
fcn = Dropout(0.5)(fcn)
fcn = Dense(64, activation='relu')(fcn)
fcn = Dropout(0.3)(fcn)
output = Dense(1, activation='sigmoid')(fcn)

# Build and Compile the Model
model = Model(inputs=[input1, input2], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
model.fit([X1_train, X2_train], y_train, epochs=20, batch_size=32, validation_data=([X1_test, X2_test], y_test))

# Evaluate the Model
loss, accuracy = model.evaluate([X1_test, X2_test], y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions and performance metrics
y_scores = model.predict([X1_test, X2_test])
y_pred = (y_scores > 0.5).astype(int)

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred, digits=2))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Calculate EER
def calculate_eer(y_test, y_scores):
    fpr, tpr, thresholds = roc_curve(y_test, y_scores)
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.abs(fpr - fnr))]
    eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
    return eer, eer_threshold

eer, eer_threshold = calculate_eer(y_test, y_scores)
print(f"EER: {eer:.4f} (Threshold: {eer_threshold:.4f})")


Epoch 1/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - accuracy: 0.9764 - loss: 0.0563 - val_accuracy: 1.0000 - val_loss: 6.5830e-10
Epoch 2/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.9999 - loss: 3.0159e-04 - val_accuracy: 1.0000 - val_loss: 1.2089e-12
Epoch 3/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 7.6629e-05 - val_accuracy: 1.0000 - val_loss: 2.9831e-16
Epoch 4/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.9999 - loss: 1.9528e-04 - val_accuracy: 1.0000 - val_loss: 1.1165e-12
Epoch 5/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 2.9421e-05 - val_accuracy: 1.0000 - val_loss: 2.5135e-13
Epoch 6/20
[1m826/826[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 4.2164e-05 - val_accuracy: 1.0000 