In [13]:
import pandas as pd
import numpy as np

import src.replay_detection as rd_features
import src.make_plot as plt_samples
import src.evaluation_plot as ev_plot

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### load data in pandas dataframe

In [None]:
data_train_original = pd.read_csv('./computedFeatures/data_train_original.csv')
data_dev_original   = pd.read_csv('./computedFeatures/data_dev_original.csv') 
data_eva_original   = pd.read_csv('./computedFeatures/data_eva_original.csv') 

data_train_spoof = pd.read_csv('./computedFeatures/data_train_spoof.csv')
data_dev_spoof   = pd.read_csv('./computedFeatures/data_dev_spoof.csv') 
data_eva_spoof   = pd.read_csv('./computedFeatures/data_eva_spoof.csv') 

# load labels for test set. Another u can upload from /protocol/* ... read as pandas DataFrame

y_test = np.loadtxt('./protocol/y_test.txt')

### scripts for learning

##### GMM-model

In [19]:
from sklearn.mixture import GMM

def GMMLearn(X_train_original, X_train_spoof,X_test, y_test, n_g):
    """
    n_g - number of Gaussian
    """
    ### Здесь еще проверь вызов функций. С 0.19 версии sklearn обновился вызов GMM. 
    ### Теперь величаем как GaussianMixture.Возможно и аргументы поменялись 
    
    g1m =  GMM(n_components = n_g, covariance_type='diag',init_params='wmc', n_iter=30)
    g1m.fit(X_train_original)

    g2m =  GMM(n_components = n_g, covariance_type='diag',init_params='wmc', n_iter=30)
    g2m.fit(X_train_spoof)

    scores = g1m.score(X_test)  - g2m.score(X_test)

    plotROC(y_test, scores, "scores")

    eer, thr = ev_plot.EER_THR(y_test, scores)
    print ("n_g = %d, EER = %f, thr = %lf" % (n_g, eer, thr))



In [None]:
sc = GMMLearn(fetureVecTrainOriginal, fetureVecTrainSpoof, featureVecTest, y_test, n_g = 512)

##### GMM-UBM model

In [None]:
def computeUBM(ubm_model, data):
    
    ###########################################
    # ubm_model - gmm-represent distribution of our model
    # data - samples, which will correct ubm-model
    ###############################################
    
    xdim = data.shape[1]
    M = ubm_model.n_components
    
    ###############################################################   
    #    ubm_means: means of the ubm <number array>               #
    #    ubm_covars: covariances of the ubm <number array>        #
    #    ubm_weights: weights of the ubm <number array>           #
    #    new_means: means adapted from the ubm <number array>     #
    #    new_weights: weights adapted from the ubm <number array> #
    ###############################################################    
        
    # Copy parameters GMM-model
    ubm_weights = ubm_model.weights_
    ubm_means = ubm_model.means_
    ubm_covars = ubm_model.covars_
           
    ###################################################################
    # for X = {x_1, ..., x_T}                                         # 
    # P(i|x_t) = w_i * p_i(x_t) / sum_j=1_M(w_j * P_j(x_t))           #
    ###################################################################
    
    posterior_prob = ubm_model.predict_proba(data)
    pr_i_xt = (ubm_weights * posterior_prob)/ np.asmatrix(np.sum(ubm_weights \
                                                * posterior_prob, axis = 1)).T

    n_i = np.asarray(np.sum(pr_i_xt, axis = 0)).flatten() # [M, ]
    
    # Then we can compute E(x) and E(x2) and calculate new parameters of
    # our model
    
    E_x = np.asarray([(np.asarray(pr_i_xt[:, i]) * data).sum(axis = 0) / n_i[i] for i in range(M)]) # [M x xdim]  
    E_x2 = np.asarray([(np.asarray(pr_i_xt[:, i]) * (data**2)).sum(axis = 0) / n_i[i] for i in range(M)])# [M x xdim]

    ################################################################ 
    #    T: scaling factor, number of samples                      #
    #    relevance_factor: factor for scaling the adapted means    #
    #    scaleparam - scale parameter for weights matrix estimation#
    ################################################################  
    
    T = data.shape[0]   
    relevance_factor = 16
    scaleparam = 1
    
    ################################################################      
    # compute alpha_i: data-depentend apaptation coefficient       #
    # alpha_w = alpha_m = alpha_v                                  #
    # alpha_i = n_i/ (n_i + relevance factor)                      #
    ################################################################
    
    alpha_i = n_i / (n_i + relevance_factor)

    ###############################
    # Parqameter`s adaptation
    ##############################
    new_weights = (alpha_i * n_i / T + (1.0 - alpha_i)* ubm_weights) * scaleparam       
    
    alpha_i = np.asarray(np.asmatrix(alpha_i).T)    
    new_means = (alpha_i * E_x + (1. - alpha_i) * ubm_means)      
    new_covars = alpha_i * E_x2 + (1. - alpha_i) * (ubm_covars + (ubm_means **2)) - (new_means ** 2)

    #############################################
    #if we want compute `full` covariance matrix - comment code here
    #new_covars = np.zeros([M, xdim, xdim])
    #for j in range(M):
    #    new_covars[j] = alpha_i[j]*E_x2[j] +(1. - alpha_i[j]).flatten()*(ubm_covars[j] + (new_means[j]**2))- (ubm_means[j]**2)
    #    new_covars[i] = np.where(new_covars[i] < MIN_VARIANCE, MIN_VARIANCE, new_covars[i])
    ####################################################################
    #   `covars_` : array
    #    Covariance parameters for each mixture component.  The shape
    #    depends on `covariance_type`::
    #        (n_components, n_features)             if 'spherical',
    #        (n_features, n_features)               if 'tied',
    #        (n_components, n_features)             if 'diag',
    #        (n_components, n_features, n_features) if 'full'
    #####################################################################

    
    ubm_model.means_ = new_means
    ubm_model.weights_ = new_weights
    ubm_model.covars_ = new_covars

    return ubm_model

In [None]:
### USAGE:
# create one dataset, included all data - spoofing and original 
# for example: X_train = pd.concat(data_train_original, data_train_spoof)

model = GMM(n_components = 200, covariance_type='diag',init_params='wmc', n_iter=20)
model.fit(X_train)

# after adapt data on development set - shifting to  original and spoofing data

adapt_original = computeUBM(model, mfcc_original_develop)
adapt_spoof = computeUBM(model, mfcc_spoof_develop)


prediction  = np.array(adapt_original.score(X_test)  < adapt_spoof.score(X_test)).astype('int')

accuracy = np.mean(prediction == y_test) * 100
print ('accuracy - prediction on evaluation set', accuracy)


### SVC

In [None]:
from sklearn.svm import SVC

clf = SVC()
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_dev_scaled = scaler.transform(X_dev)

clf.fit(X_train_scaled, y_train)
prediciton = clf.predict(X_dev_scaled)
