In [1]:
import numpy as np
import os
from preprocessor import *
import torch
from models.mstcn import *
from models.resnet_feature_extractor import *
from FeatureExtractor import *
import time
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score

In [2]:
from LipMovementClassifier import *

In [3]:

classifer = LipMovementClassifier('featuresExtracted/face2facefeatures.npz', learning_rate=0.0005)
classifer.train()
classifer.test()

Epoch: 1, loss: 1.657
accuracy: 0.770
AUC: 0.833
Epoch: 2, loss: 0.293
accuracy: 0.883
AUC: 0.954
Epoch: 3, loss: 0.233
accuracy: 0.915
AUC: 0.973
Epoch: 4, loss: 0.153
accuracy: 0.943
AUC: 0.987
Epoch: 5, loss: 0.141
accuracy: 0.947
AUC: 0.989
Epoch: 6, loss: 0.105
accuracy: 0.960
AUC: 0.994
Epoch: 7, loss: 0.105
accuracy: 0.962
AUC: 0.994
Epoch: 8, loss: 0.076
accuracy: 0.970
AUC: 0.997
Epoch: 9, loss: 0.077
accuracy: 0.972
AUC: 0.997
Epoch: 10, loss: 0.055
accuracy: 0.981
AUC: 0.998
Time of Training: 803.612
Test Set Evaluation:
Test Videos Accuracy: 0.979
Test Videos AUC: 0.997
Test Videos Confusion Matrix:
[[139   1]
 [  5 135]]
Test Videos Classification Report:
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98       140
         1.0       0.99      0.96      0.98       140

    accuracy                           0.98       280
   macro avg       0.98      0.98      0.98       280
weighted avg       0.98      0.98      0.98       2

In [None]:
OutputDir = "trainedclassifiers/model"+str(len(os.listdir("trainedclassifiers"))+1)
os.makedirs(OutputDir)

report = open(OutputDir+"/report.txt", "w")

pretrainedModelPath = 'pretrainedModels/mstcn.pth'
configFile = 'models/configs/mstcn.json'
report.write("Pretrained Model Path: "+pretrainedModelPath+"\n")
report.write("Config File: "+configFile+"\n")

MSTCNModel = load_mstcn_model(pretrainedModelPath,configfile=configFile)

learning_rate = 2e-4
batch_size = 32

FakeFeaturesPath = 'featuresExtracted/face2facefeatures.npz'
RealFeaturesPath = 'featuresExtracted/realfeatures.npz'

report.write("Learning Rate: "+str(learning_rate)+"\n")
report.write("Batch Size: "+str(batch_size)+"\n")
report.write("Fake Features Path: "+FakeFeaturesPath+"\n")
report.write("Real Features Path: "+RealFeaturesPath+"\n")

In [None]:
#Finetune model.tcn by training it on features extracted from real and fake videos
fakeFeatures = np.load(FakeFeaturesPath)['features']
realFeatures = np.load(RealFeaturesPath)['features']

#Split last 2400 frames (last 140 videos) as test set
fakeTestFeatures = fakeFeatures[-1680:]
realTestFeatures = realFeatures[-1680:]
fakeTrainFeatures = fakeFeatures[:-1680]
realTrainFeatures = realFeatures[:-1680]

#Labels
fakeTestLabels = np.zeros((fakeTestFeatures.shape[0],1))
realTestLabels = np.ones((realTestFeatures.shape[0],1))

fakeTrainLabels = np.zeros((fakeTrainFeatures.shape[0],1))
realTrainLabels = np.ones((realTrainFeatures.shape[0],1))

#Concatenate datasets
trainFeatures = np.concatenate((fakeTrainFeatures, realTrainFeatures), axis=0)
trainLabels = np.concatenate((fakeTrainLabels, realTrainLabels), axis=0)
testFeatures = np.concatenate((fakeTestFeatures, realTestFeatures), axis=0)
testLabels = np.concatenate((fakeTestLabels, realTestLabels), axis=0)

#shuffle training data
# np.random.seed(42)
# indices = np.random.permutation(trainFeatures.shape[0])
# trainFeatures = trainFeatures[indices]
# trainLabels = trainLabels[indices]

torch.manual_seed(42)
trainloader = torch.utils.data.DataLoader(list(zip(trainFeatures, trainLabels)), batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(list(zip(testFeatures, testLabels)), batch_size=batch_size)

In [None]:
trainLabels.sum()

In [None]:
def calculateAccuracy(preds,labels):
    preds = np.array(preds).round().flatten()
    labels = np.array(labels).flatten()
    return (preds == labels).mean()

In [None]:
def calculateAUC(preds,labels):
    preds = np.array(preds).flatten()
    labels = np.array(labels).flatten()
    return roc_auc_score(labels,preds)

In [None]:
def calculateAccuracyByVideos(preds,labels):
    preds = np.array(preds).round().flatten()
    labels = np.array(labels).flatten()
    preds = preds.reshape(-1,12)
    labels = labels.reshape(-1,12)
    preds = np.mean(preds,axis=1).round()
    labels = np.mean(labels,axis=1).round()
    return (preds == labels).mean()

In [None]:
def calculateAUCByVideos(preds,labels):
    preds = np.array(preds).flatten()
    labels = np.array(labels).flatten()
    preds = preds.reshape(-1,12)
    labels = labels.reshape(-1,12)
    preds = np.mean(preds,axis=1)
    labels = np.mean(labels,axis=1)
    return roc_auc_score(labels,preds)

In [None]:
def caculateCMAndReportByVideo(preds,labels):
    preds = np.array(preds).round().flatten()
    labels = np.array(labels).flatten()
    preds = preds.reshape(-1,12)
    labels = labels.reshape(-1,12)
    preds = np.mean(preds,axis=1).round()
    labels = np.mean(labels,axis=1).round()
    cm = confusion_matrix(labels,preds)
    report = classification_report(labels,preds)
    return cm,report

In [None]:
def calculateCMAndReport(preds,labels):
    preds = np.array(preds).round().flatten()
    labels = np.array(labels).flatten()
    return confusion_matrix(labels,preds), classification_report(labels,preds)

In [None]:
epochs = 10
optimizer = torch.optim.Adam(MSTCNModel.parameters(), lr=learning_rate)
criterion = nn.BCELoss()

startTime = time.time()

for epoch in range(epochs):
    running_loss = 0.0
    all_labels = []
    all_preds = []
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.cuda().float()
        labels = labels.cuda().float()
        optimizer.zero_grad()
        outputs = MSTCNModel(inputs, lengths=[inputs.shape[1] for i in range(inputs.shape[0])])
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        all_preds.extend(outputs.detach().cpu().numpy())
        all_labels.extend(labels.detach().cpu().numpy())
        

    epochAccuracy = calculateAccuracy(all_preds, all_labels)
    epochAUC = calculateAUC(all_preds, all_labels)
    print('Epoch: %d, loss: %.3f' % (epoch + 1, running_loss / (i + 1)))
    report.write('Epoch: %d, loss: %.3f' % (epoch + 1, running_loss / (i + 1))+"\n")
    print('accuracy: %.3f' % (epochAccuracy))
    report.write('accuracy: %.3f' % (epochAccuracy)+"\n")
    print('AUC: %.3f' % (epochAUC))
    report.write('AUC: %.3f' % (epochAUC)+"\n")

endTime = time.time()

#calculate time taken
timeTaken = endTime - startTime
print('Time of Training: %.3f' % (timeTaken))
report.write('Time of Training: %.3f' % (timeTaken)+"\n")

In [None]:
MSTCNModel.eval()

#Calculate accuracy on test set
all_labels = []
all_preds = []
for i, data in enumerate(testloader, 0):
    inputs, labels = data
    inputs = inputs.cuda().float()
    labels = labels.cuda().float()
    outputs = MSTCNModel(inputs, lengths=[inputs.shape[1] for i in range(inputs.shape[0])])
    all_preds.extend(outputs.detach().cpu().numpy())
    all_labels.extend(labels.detach().cpu().numpy())

print('Test Set Evaluation:')
testVideosAccuracy = calculateAccuracyByVideos(all_preds, all_labels)
testVideosAUC = calculateAUCByVideos(all_preds, all_labels)
testVideoCM, testVideoReport = caculateCMAndReportByVideo(all_preds, all_labels)

testAccuracy = calculateAccuracy(all_preds, all_labels)
testAUC = calculateAUC(all_preds, all_labels)
testCM, testReport = calculateCMAndReport(all_preds, all_labels)

print('Test Videos Accuracy: %.3f' % (testVideosAccuracy))
report.write('Test Videos Accuracy: %.3f' % (testVideosAccuracy)+"\n")
print('Test Videos AUC: %.3f' % (testVideosAUC))
report.write('Test Videos AUC: %.3f' % (testVideosAUC)+"\n")
print('Test Videos Confusion Matrix:')
report.write('Test Videos Confusion Matrix:'+"\n")
print(testVideoCM)
report.write(str(testVideoCM)+"\n")
print('Test Videos Classification Report:')
report.write('Test Videos Classification Report:'+"\n")
print(testVideoReport)
report.write(str(testVideoReport)+"\n")

print('Test Accuracy: %.3f' % (testAccuracy))
report.write('Test Accuracy: %.3f' % (testAccuracy)+"\n")
print('Test AUC: %.3f' % (testAUC))
report.write('Test AUC: %.3f' % (testAUC)+"\n")
print('Test Confusion Matrix:')
report.write('Test Confusion Matrix:'+"\n")
print(testCM)
report.write(str(testCM)+"\n")
print('Test Classification Report:')
report.write('Test Classification Report:'+"\n")
print(testReport)
report.write(str(testReport)+"\n")


torch.save(MSTCNModel.state_dict(), OutputDir+'/lips_movements_classifer.pth')
report.close()