In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from numpy import loadtxt
from collections import Counter

In [None]:
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split, KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay , mean_absolute_error
from tensorflow.keras.losses import categorical_crossentropy, BinaryCrossentropy


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, InputLayer
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import Nadam


In [None]:
feature_df = pd.read_csv('feature_df.csv')
feature_df

In [None]:
combined_df = pd.read_csv('combined_df.csv')
combined_df

In [None]:
num_folds = 10

inputs = feature_df
target = combined_df['real']
size_user = range(int(combined_df['user_id'].max()))

#weight sample 
print(f'inputs size = {len(inputs)}')
print(f'target size = {len(target)}')
sample_weight = np.ones(shape=(len(target),))
sample_weight[combined_df['attribute'] == 'lfw'] = 13
print(f'sample size = {len(sample_weight)}')
print(f'weight size per sample  = {Counter(sample_weight)}')



kfold = KFold(n_splits=num_folds, shuffle=True)

fold_no = 1
sample_count = 0
acc_scores = []
prec_scores = []
rec_scores = []
f1_scores = []
for train, test in kfold.split(size_user):
    
    index_train = combined_df[combined_df['user_id'].isin(train)].index
    index_test = combined_df[combined_df['user_id'].isin(test)].index
    
#     print(f'train {index_train}')
#     print(f'test {index_test}')
    
#     print(f'train {len(index_train)}')
#     print(f'test {len(index_test)}')
    
    
    MLP_ = Sequential()
    MLP_.add(InputLayer(input_shape=(2048, ))) 
    MLP_.add(Dense(64, activation='relu'))
    MLP_.add(Dense(32, activation='relu'))# hidden layer 1
    MLP_.add(Dense(2, activation='softmax')) # output layer

    
    
    
    batch_size = 88
    loss_function = sparse_categorical_crossentropy
    no_epochs = 200
    optimizer = Nadam()
    verbosity = 1
    

    opt=tf.keras.optimizers.Nadam(learning_rate=0.0000001,name="Nadam")

  # Compile the model 
    MLP_.compile(loss=loss_function,optimizer=opt,metrics=['accuracy'])


  # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    
    
    print((inputs.iloc[index_train,:]).shape, (target.iloc[index_train]).shape)
    print(len(inputs.iloc[index_train,:]), len(target.iloc[index_train]))
    
    history = MLP_.fit(inputs.iloc[index_train,:], target.iloc[index_train],
              batch_size=batch_size,
              epochs=no_epochs,sample_weight=sample_weight[index_train])
    
    y_pred = MLP_.predict(inputs.iloc[index_test,:])
    y_test = target.iloc[index_test].tolist()
    result = []
    for inner_list in y_pred:
        if inner_list[0] > inner_list[1] :
            result.append(0)
        else :
            result.append(1)
    
    
    
    acc_scores.append(accuracy_score(y_test,result))
    prec_scores.append(precision_score(y_test, result))
    rec_scores.append(recall_score(y_test, result))
    f1_scores.append(f1_score(y_test,result))

    # Print the evaluation metrics
    print("Accuracy:", np.mean(acc_scores))
    print("Precision:", np.mean(prec_scores))
    print("Recall:", np.mean(rec_scores))
    print("F1 score:", np.mean(f1_scores))

    
    MLP_.save(f'All_attribute_{fold_no}.h5')

  # Increase fold number
    fold_no = fold_no + 1
    

# # == Provide average scores ==
# print('------------------------------------------------------------------------')
# print('Score per fold')
# for i in range(0, len(acc_per_fold)):
#     print('------------------------------------------------------------------------')
#     print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
# print('------------------------------------------------------------------------')
# print('Average scores for all folds:')
# print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
# print(f'> Loss: {np.mean(loss_per_fold)}')
# print('------------------------------------------------------------------------')

In [None]:
target_list = target.tolist()

In [None]:
MLP_ = keras.models.load_model('All_attribute_1.h5')
test_predictions = MLP_.predict(inputs)
result = []
for inner_list in test_predictions:
        if inner_list[0] > inner_list[1] :
            result.append(0)
        else :
            result.append(1)
print('epochs 200 + layer')
recall_score = recall_score(result,target_list, average='macro')
print('recall',recall_score)
f1_macro = f1_score(result,target, average='macro')
print("F1 score (macro):", f1_macro)
print('mean_absolute_error',mean_absolute_error(result,target_list))

In [None]:
confusion_matrix(result,target_list)

In [None]:
cm = confusion_matrix(target_list, result)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()