## Classification tasks 

In [None]:
###################### TASK 1 #########################

def train_test_split(X, y):
    I = (X.shape[0]//3)*2
    X_train = X[:I, :]
    y_train = y[:I]
    X_val = X[I:,:]
    y_val = y[I:]
    return X_train, X_val, y_train, y_val

import numpy as np
def classifier(reps):
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(solver='sag', random_state=None,  multi_class='multinomial', warm_start=False, max_iter = 2000)
    X = np.load(reps+"_features.npy")
    y = np.load(reps+"_phonemes.npy")
   
    X_train, X_val, y_train, y_val = train_test_split(X, y)
    
    scaler = StandardScaler()
    
    X_train = scaler.fit_transform(X_train)
    
    model.fit(X_train,y_train)
    X_val = scaler.transform(X_val)
    val_score = 1-model.score(X_val, y_val)
    
    W = np.load("test_"+reps+"_features.npy")
    W = scaler.transform(W)
    z = np.load("test_"+reps+"_phonemes.npy")
    
    test_error_rate = 1-model.score(W, z)
    global_error_rates[reps]= (val_score, test_error_rate)
    return(val_score, test_error_rate)

print(classifier())

###VIZUALIZATION####
import pandas as pd 
representations = ['mfcc','conv', 'rec0', 'rec1', 'rec2', 'rec3']
validation_errors = [0.5852624655131461,  0.6331695017448977, 0.42787451352980943, 0.4196159397781879, 
                     0.43357512196013226, 0.44196159397781876 ]
test_errors =[0.6081288343558282, 0.6614263803680982, 0.5663343558282208, 0.5617331288343559, 
              0.5648006134969326, 0.5751533742331288 ]

dataset = {'representations':representations, 'validation_errors': validation_errors, 'test_errors': test_errors}

df = pd.DataFrame(data = dataset)

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

sns.set_style("white", {'legend.frameon':False})
g1 = sns.pointplot(x="representations", y="validation_errors", color='blue', scale =0.7, linestyles ='--',data=df)
g2 = sns.pointplot(x="representations", y="test_errors", color='red', scale =0.7,linestyles ='--', data=df)

blue_patch = mpatches.Patch(color='red', label='test data error')
red_patch = mpatches.Patch(color='blue', label='train data validation error')
plt.legend(handles=[red_patch, blue_patch], frameon = False, loc='best', bbox_to_anchor=(1, 0., 0.5, 0.5))
sns.despine()
plt.xlabel("representations")
plt.ylabel("error rates")
plt.title("errors per layer")
print(plt.show())

In [None]:
################### TASK 2 #######################

def train_test_split(X, y):
    I = (X.shape[0]//3)*2
    X_train = X[:I, :]
    y_train = y[:I]
    X_val = X[I:,:]
    y_val = y[I:]
    return X_train, X_val, y_train, y_val

import numpy as np
def classifier(reps):
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(solver='sag', random_state=None,  multi_class='multinomial', warm_start=False, max_iter = 2000)
    X = np.load(reps+"_features.npy")
    y = np.load(reps+"_phonemes.npy")
   
    X_train, X_val, y_train, y_val = train_test_split(X, y)
    
    scaler = StandardScaler()
    
    X_train = scaler.fit_transform(X_train)
    
    model.fit(X_train,y_train)
    X_val = scaler.transform(X_val)
    

    W = list_separated(reps)['original'][1]
    W = scaler.transform(W)
    z = list_separated(reps)['original'][0]
    ori_err = 1-model.score(W, z)
        
    W1 = list_separated(reps)['manipulated'][1]
    W1 = scaler.transform(W1)
    z1 = list_separated(reps)['manipulated'][0]
    mani_err = 1-model.score(W1, z1)
    
    return(ori_err, mani_err)

###VIZUALIZATION####

import pandas as pd 
representations = ['mfcc','conv', 'rec0', 'rec1', 'rec2', 'rec3']
original_errors = [0.653061224489796, 0.6734693877551021, 0.5510204081632653, 0.4897959183673469, 0.40816326530612246, 0.4897959183673469]
manipulated_errors =[0.9387755102040817, 0.9591836734693877, 1.0, 0.9591836734693877,0.9591836734693877, 0.9387755102040817]

dataset = {'representations':representations, 'original_errors': original_errors, 'manipulated_errors': manipulated_errors}

df = pd.DataFrame(data = dataset)

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

sns.set_style("white", {'legend.frameon':False})
g1 = sns.pointplot(x="representations", y="original_errors", color='blue', scale =0.7, linestyles ='--',data=df)
g2 = sns.pointplot(x="representations", y="manipulated_errors", color='red', scale =0.7,linestyles ='--', data=df)

blue_patch = mpatches.Patch(color='red', label='manipulated pho. errors')
red_patch = mpatches.Patch(color='blue', label='original pho. errors')
plt.legend(handles=[red_patch, blue_patch], frameon = False, loc='best', bbox_to_anchor=(1, 0., 0.5, 0.5))
sns.despine()
plt.xlabel("representations")
plt.ylabel("error rates")
plt.title("original & manipulated phonemes errors per layer")
print(plt.show())


In [None]:
################### TASK 3 #######################

def train_test_split(X, y):
    I = (X.shape[0]//3)*2
    X_train = X[:I, :]
    y_train = y[:I]
    X_val = X[I:,:]
    y_val = y[I:]
    return X_train, X_val, y_train, y_val

import numpy as np
def classifier(reps):
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(solver='sag', random_state=None,  multi_class='multinomial', warm_start=False, max_iter = 2000)
    X = np.load(reps+"_features.npy")
    y = np.load(reps+"_phonemes.npy")
   
    X_train, X_val, y_train, y_val = train_test_split(X, y)
    
    scaler = StandardScaler()
    
    X_train = scaler.fit_transform(X_train)
    
    model.fit(X_train,y_train)
    X_val = scaler.transform(X_val)
    
    DICTIONARY={reps:{}}
    for letter in letters_separate(reps):
        W = letters_separate(reps)[letter]['original'][1]
        W = scaler.transform(W)
        z = letters_separate(reps)[letter]['original'][0]
        ori_err = 1-model.score(W, z)
        
        W1 = letters_separate(reps)[letter]['manipulated'][1]
        W1 = scaler.transform(W1)
        z1 = letters_separate(reps)[letter]['manipulated'][0]
        mani_err = 1-model.score(W1, z1)
    
        prediction_manip = model.predict(W1)
        prediction_orig = model.predict(W)
        orig_class_probs = model.predict_proba(W)
        manip_class_probs = model.predict_proba(W1)
        
        DICTIONARY[reps][letter]= [(ori_err, prediction_orig, orig_class_probs) ,
                                   (mani_err, prediction_manip, manip_class_probs)]


    return DICTIONARY
    
    
  ###VIZUALIZATION####
  
import seaborn as sns
import matplotlib.pyplot as plt
g = sns.FacetGrid(DF, col='phoneme', row='prediction', hue = 'prediction',height=4) #DF dataset of complied results shared in drive
lbls = ['mfcc','conv','rec0','rec1','rec2','rec3']
g.map(plt.scatter, "representations", "predic_label_rate", alpha=0.5,linewidth=0.5)
plt.xticks(np.arange(6),lbls)
fig = g.fig 
fig.set_size_inches(18, 8)
fig.subplots_adjust(top=0.85, wspace=0.3)
fig.suptitle('phoneme - representations - prediction label rate- prediction', fontsize=14)
print(g.add_legend(title='manipulated phoneme prediction label'))