In [1]:
import pandas as pd
import time
import numpy as np
import matplotlib.pyplot as plt

# tensorflow libraries
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Lambda
import tensorflow.keras.backend as K

In [2]:
train_df = pd.read_pickle('Speaker_Classification_data/train.df')
test_df = pd.read_pickle('Speaker_Classification_data/test.df')

In [3]:
train_df.head()

Unnamed: 0,inputA,inputB,label
0,"[0.24985932948240364, 0.6350656914100974, 0.82...","[0.7820350888855649, 0.3198341320895355, 0.303...",1
1,"[0.3447592728071933, 0.7605236831924497, 0.789...","[0.6068565133586963, 0.4892604057585005, 0.647...",1
2,"[0.5835598702428237, 0.2923623783944991, 0.494...","[0.7092419823457793, 0.3953920733270686, 0.584...",0
3,"[0.6494945188525626, 0.33861761010055624, 0.29...","[0.412796658774194, 0.6307090412962476, 0.7552...",0
4,"[0.5530646953235432, 0.46914860434629924, 0.48...","[0.605411651622807, 0.5611188390198073, 0.5267...",1


In [4]:
test_df.head()

Unnamed: 0,inputA,inputB,label
0,"[0.7599851760351478, 0.27829568977021635, 0.39...","[0.3881078382392493, 0.6320058758104472, 0.777...",1
1,"[0.23891840228138905, 0.19716560358624316, 0.7...","[0.1339817574334914, 0.3268776091606487, 0.788...",1
2,"[0.75814829097616, 0.38292398110746095, 0.6330...","[0.3696297750956259, 0.5905143892117646, 0.885...",0
3,"[0.2607263177495158, 0.6446019065053701, 0.742...","[0.729151566561377, 0.1314536503611208, 0.2343...",0
4,"[0.7674167172311745, 0.26044605158506295, 0.08...","[0.8528030713373913, 0.2308533587518186, 0.265...",1


In [5]:
def build_siamese_model(inputsize, nlayers=2, nsize=64, emb_dim=100):
    inputs = Input(inputsize)
    x = Dense(nsize, activation='relu')(inputs)
    x = Dropout(0.1)(x)
    for _ in range(nlayers-1):
        x = Dense(nsize, activation='relu')(x)
        x = Dropout(0.1)(x)
    outputs = Dense(emb_dim)(x)
    model = Model(inputs, outputs)
    
    return model

In [6]:
def cosine_similarity(vests):
    x, y = vests
    x = K.l2_normalize(x, axis=-1)
    y = K.l2_normalize(y, axis=-1)
    return K.sum(x * y, axis=-1, keepdims=True)

def cos_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0],1)

In [15]:
# building model
input_shape = len(train_df['inputA'][0])
inputA = Input(input_shape)
inputB = Input(input_shape)
featureExtractor = build_siamese_model(input_shape)
featsA = featureExtractor(inputA)
featsB = featureExtractor(inputB)

output_cos = Lambda(cosine_similarity, output_shape=cos_dist_output_shape)([featsA, featsB])
model = Model(inputs=[inputA, inputB], outputs=output_cos)

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [16]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 47)]         0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            [(None, 47)]         0                                            
__________________________________________________________________________________________________
model_2 (Functional)            (None, 100)          13732       input_4[0][0]                    
                                                                 input_5[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 1)            0           model_2[0][0]              

In [12]:
train_df['inputA'] = train_df.apply(lambda x: np.array(x['inputA']), axis=1)
train_df['inputB'] = train_df.apply(lambda x: np.array(x['inputB']), axis=1)

test_df['inputA'] = test_df.apply(lambda x: np.array(x['inputA']), axis=1)
test_df['inputB'] = test_df.apply(lambda x: np.array(x['inputB']), axis=1)

In [14]:
train_df['inputA'][0]

array([2.49859329e-01, 6.35065691e-01, 8.28611267e-01, 7.65333672e-01,
       7.31631595e-01, 6.36227665e-01, 8.06864672e-01, 7.54001216e-01,
       5.86104537e-01, 6.11649904e-01, 4.36385925e-01, 4.83832310e-01,
       6.41672253e-01, 4.74620198e-01, 5.09460410e-01, 5.94459832e-01,
       6.45854726e-01, 7.00258344e-01, 6.93676032e-01, 7.09374271e-01,
       8.12282945e-01, 8.87349147e-01, 8.26914519e-01, 7.44313780e-01,
       6.83754190e-01, 5.30709550e-01, 2.48598184e-01, 4.58517299e-01,
       3.87128050e-01, 4.05851301e-01, 4.05928998e-01, 1.70740128e-01,
       5.08491768e-01, 4.24961292e-01, 5.88717167e-01, 7.25058453e-01,
       4.44843307e-01, 3.82548822e-01, 2.34412011e-02, 1.89612561e-01,
       1.37411920e-01, 2.25465108e-02, 1.70106484e-01, 7.66622483e-01,
       7.62607398e-04, 6.98730731e-01, 8.17818912e-01])

In [23]:
history = model.fit([train_df['inputA'], train_df['inputB']], train_df['label'],
          validation_data=([test_df['inputA'], test_df['inputB']], test_df['label']),
          batch_size=16, epochs=50, verbose=2)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).