In [27]:
import os
import cv2
import torch

In [67]:
Configuration = {'RootPath': r"C:\Users\Utente\Projects\Thesis",
                 'PositiveSamples':[r"C:\Users\Utente\Projects\Thesis\dataset\Recaptured"],
                 'NegativeSamples':[r"C:\Users\Utente\Projects\Thesis\dataset\Original"],
                 'PostiveSamples_dlc':[r'C:\Users\Utente\Projects\Thesis\dataset\DLC2021\Recaptured'],
                 'NegativeSamples_dlc': [r'C:\Users\Utente\Projects\Thesis\dataset\DLC2021\Original'],
                 'Destination_train':r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features\train",
                 'Destination_test':r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features\test",
                 'LBPF_parameters': [(8,1),(16,2),(24,3),(24,4)],
                 'Methods': ['LBPF','MSWF','MARKOV','CNN'],
                 'Batchsize':16,
                 'Train_size': 0.7,
                 'val_size' : 0.15,
                 'test_size' : 0.15,
                 'device': 'cuda' if torch.cuda.is_available() else 'cpu'}

In [30]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('Experiments')

# Feature Extraction Classes

In this section, we will define the classes for the following feature extraction techniques:

1. **Multi-Scale Local Binary Pattern (LBP) Features**
2. **Multi-Scale Wavelet Features**
3. **Markov Discrete Cosine Transform (DCT) Features**
4. **ResNet18 Features**

The use of All these features apart from ResNet was inspired by the paper https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8462205


## 1. Multi-Scale Local Binary Pattern (LBP) Features


The implementation was inspired by the paper https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1017623

In [31]:
from skimage import feature
import numpy as np


class MultiScaleLocalBinaryPatternFeatures():
    def __init__(self,parameters):
        self.parameters = parameters

    def Calculate_MultiScale_LBPF(self,image):
        img_gray = cv2.cvtColor(image, 
                        cv2.COLOR_BGR2GRAY) 
        center_crop = self.get_center_block(img_gray)
        hist = []
        for param in self.parameters:
            hist.extend(self.calc_LBP_hist(center_crop,param[0],param[1]))

        return hist

    def calc_LBP_hist(self,image, P, R):
        lbp = feature.local_binary_pattern(image, P, R, method="uniform")
        hist = self.histogram(lbp, P, R)
        return hist

    def histogram(self,lbp, P, R):
        (hist, _) = np.histogram(lbp.ravel(),
                                bins=np.arange(0, P + 3),
                                range=(0, P + 2))

        # normalize the histogram
        hist = np.float32(hist)
        eps = 1e-7
        hist /= (hist.sum() + eps)
        return hist
    
    def get_center_block(self,image):
        # Get the dimensions of the image
        height, width = image.shape

        # Calculate the starting and ending indices for the center block
        start_row = (height - 224) // 2
        end_row = start_row + 224
        start_col = (width - 128) // 2
        end_col = start_col + 224

        # Extract the center block
        center_block = image[start_row:end_row, start_col:end_col]

        return center_block   

## 2. Here We will define the Class for MultiScaleWaveletFeatures.

The code for this implementation was inspired by this paper, https://ieeexplore.ieee.org/document/5495419



In [32]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pywt


class NlevelWaveletDecomposition():
    def __init__(self,level=3,filter_type='haar'):
        self.decomposition_level=level
        self.filter= filter_type
        
    
    def compute_wavelet_statistics(self,image):
        """
        Compute wavelet statistics for the given image.

        Parameters:
            image (numpy.ndarray): Input image (RGB).
            num_levels (int): Number of levels for wavelet decomposition.

        Returns:
            features (numpy.ndarray): Computed wavelet statistics features.
        """


        # Convert image to float
        image = image.astype(np.float32)

        # Separate R, G, B channels
        channels = cv2.split(image)

        # Initialize features array
        features = []

        # Perform wavelet decomposition for each channel
        for channel in channels:
            center_crop=self.get_center_block(channel)
            coeffs = pywt.wavedec2(center_crop, self.filter , level=self.decomposition_level)

            # Extract high-frequency bands and compute statistics
            for i in range(1, self.decomposition_level + 1):
                for detail_band in coeffs[i]:
                    mean = np.mean(np.abs(detail_band))
                    std_dev = np.std(np.abs(detail_band))
                    features.extend([mean, std_dev])

        return np.array(features)
    
    def get_center_block(self,image):
        # Get the dimensions of the image
        height, width = image.shape

        # Calculate the starting and ending indices for the center block
        start_row = (height - 224) // 2
        end_row = start_row + 224
        start_col = (width - 224) // 2
        end_col = start_col + 224

        # Extract the center block
        center_block = image[start_row:end_row, start_col:end_col]

        return center_block   


## 3. Markov Precess Based on Discrete Cosine Transform features


The implementation of MarkoveDCT features was not found easily therefore with the help of google and youtube I cam with following implementation. however the approach is described in this paper

https://dl.acm.org/doi/10.1145/2393347.2396396




In [42]:
import numpy as np
from scipy.fft import dctn
from scipy.stats import entropy


class MarkovFeatures:
    def __init__(self,threshold=3):
        self.threshold = threshold
        

    def compute_markov_features(self,image):
        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        Red,_,_ = cv2.split(image) 

        # Ensure image dimensions are multiples of 8
        # Apply 2-D discrete cosine transform (DCT) to every 8x8 block
        features = []

        block = self.get_center_block(Red)
        
        dct_block = np.round(dctn(block)).astype(np.int32)
        Fh, Fv, Fd, Fmd = self.compute_differences_dct_coefficientes(dct_block)
        for f in [Fh,Fv,Fd,Fmd]:
            transition_matrix =  self.compute_transition_probability_matrix(f)
            features.extend(transition_matrix)

        return features
    
    def get_center_block(self,image):
        # Get the dimensions of the image
        height, width = image.shape

        # Calculate the starting and ending indices for the center block
        start_row = (height - 8) // 2
        end_row = start_row + 8
        start_col = (width - 8) // 2
        end_col = start_col + 8

        # Extract the center block
        center_block = image[start_row:end_row, start_col:end_col]

        return center_block       

    def compute_differences_dct_coefficientes(self,F):
        # Compute the size of the image
        Du, Dv = F.shape
        
        # Initialize difference DCT coefficients arrays
        Fh = np.zeros((Du - 1, Dv))
        Fv = np.zeros((Du, Dv - 1))
        Fd = np.zeros((Du - 1, Dv - 1))
        Fmd = np.zeros((Du - 1, Dv - 1))
        
        # Compute difference DCT coefficients arrays
        for u in range(Du - 1):
            for v in range(Dv):
                Fh[u, v] = F[u, v] - F[u + 1, v]
        
        for u in range(Du):
            for v in range(Dv - 1):
                Fv[u, v] = F[u, v] - F[u, v + 1]
        
        for u in range(Du - 1):
            for v in range(Dv - 1):
                Fd[u, v] = F[u, v] - F[u + 1, v + 1]
        
        for u in range(Du - 1):
            for v in range(Dv - 1):
                Fmd[u, v] = F[u + 1, v] - F[u, v + 1]
        
        return Fh, Fv, Fd, Fmd
    
    def compute_transition_probability_matrix(self,Fx):
        
        # Apply thresholding technique
        Fx_thresholded = np.ones_like(Fx)
        for i in range(Fx.shape[0]):
            for j in range(Fx.shape[1]):
                if Fx[i][j] > self.threshold:
                    Fx_thresholded[i][j] = self.threshold
                elif Fx[i][j] < -self.threshold:
                    Fx_thresholded[i][j] = -self.threshold
                else:  # Include values within the threshold range
                    Fx_thresholded[i][j] = Fx[i][j]
    
        m = n = 2 * self.threshold + 1
        ph = np.zeros((m, n))

        for i in range(m):
            for j in range(n):
                ph[i, j] = self.probability_ij(i - self.threshold - 1, j - self.threshold - 1, Fx_thresholded)

        return ph.flatten()

    def probability_ij(self, i, j, Fx_thresholded):
        numerator = 0
        denominator = 0

        u, v = Fx_thresholded.shape

        for u_idx in range(u - 1):
            for v_idx in range(v - 1):
                if Fx_thresholded[u_idx][v_idx] == i and Fx_thresholded[u_idx + 1][v_idx] == j:
                    numerator += 1

                if Fx_thresholded[u_idx][v_idx] == i:
                    denominator += 1

        if denominator == 0:
            return 0
        else:
            return numerator / denominator


## 3. ResNet18-50 Features

The class ResnetFeatures will help to obtain resent features from the encoder of choice. pytorch implemenation is used.


In [59]:
import torch
from torchvision import transforms
from torchvision import models
from PIL import Image

class ResnetFeatures:
    def __init__(self,model_name,device):

        # Define preprocessing
        self.preprocess = transforms.Compose([
            transforms.ToPILImage(),
            transforms.CenterCrop((224,224)),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ])
        
        self.device = device
        # Download model
        match model_name:
            case 'ResNet50':
                resnet = models.resnet50(weights='DEFAULT',progress=True)
                resnet = torch.nn.Sequential(*(list(resnet.children())[:-1]))
            case 'ResNet18':
                resnet = models.resnet50(weights='DEFAULT',progress=True)
                resnet = torch.nn.Sequential(*(list(resnet.children())[:-1]))

        self.feature_extractor = resnet
        self.feature_extractor.to(self.device)

        for param in self.feature_extractor.parameters():
            param.requires_grad = False        

        self.feature_extractor.eval()


    def get_features(self,images):

        transformed_images = [self.preprocess(image) for image in images]
        transformed_images= torch.stack(transformed_images).to(self.device)
        output = self.feature_extractor(transformed_images)
        results = [torch.flatten(feature, 0).cpu().numpy().tolist() for feature in output]

        return results
    



# Classifer Class

Classifier defined below is designed in a modular way to be able to use all previousely defined features and contains the pipleline for precess of loading the samples , splitting the data, shuffeling the data, logging , standarizing etc.



In [68]:
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score,f1_score
import pickle
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier

class Classification:
    def __init__(self,transformation,identifier,positive_samples_path,negative_samples_path,writer):

        self.transformation = transformation
        self.positive_samples_path = positive_samples_path
        self.negative_samples_path = negative_samples_path
        self.identifier = identifier
        self.writer = writer


    def load_samples(self,test_docs):

        positive_label = 1
        negative_label = 0
        positive_train_samples , positive_train_labels , positive_test_samples , positive_test_labels= self.load_positive_negative_samples(test_docs,self.positive_samples_path , positive_label)

        negative_train_samples , negative_train_labels , negative_test_samples , negative_test_labels = self.load_positive_negative_samples(test_docs , self.negative_samples_path , negative_label)

        return self.shuffle_data(pd.DataFrame({'samples':positive_train_samples+negative_train_samples,'labels':positive_train_labels+negative_train_labels})),pd.DataFrame({'samples':positive_test_samples+negative_test_samples,'labels':positive_test_labels+negative_test_labels})
    

    def load_positive_negative_samples(self,test_docs,sample_path , label ):

        train_samples , train_labels , test_samples , test_labels = [] , [] , [] , []
        if 'ResNet' in self.identifier:
            idx = 0
            images = []
            batchsize = Configuration['Batchsize']

        for path in sample_path:
            for root, _, files in os.walk(path):
                flag = 'train'
                for i in test_docs:
                    if i in root:
                        flag = 'test'
                
                for file in files:
                    if file.endswith((".jpg", ".png")):  # Use tuple for multiple extensions
                        path = os.path.join(root, file)
                        image = cv2.imread(path)
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                        if 'ResNet' in self.identifier :
                            if  idx<batchsize:
                                images.append(image)
                                idx += 1
                                continue
                            else:
                                features=self.transformation(images)
                                if flag=='train':
                                    train_samples.extend(features)
                                    train_labels.extend([label]*batchsize)
                                else:
                                    test_samples.extend(features)
                                    test_labels.extend([label]*batchsize)
                                idx=0
                                images=[]
                        
                        else:
                            feature = self.transformation(image)
                            if flag=='train':
                                train_samples.append(feature)
                                train_labels.append(label)
                            else:
                                
                                test_samples.append(feature)
                                test_labels.append(label)
        


        return train_samples,train_labels,test_samples,test_labels
    


    def shuffle_data(self,data):
        shuffeled_data = data.sample(frac=1, random_state=42)
        return shuffeled_data


    def split_data(self,data):
               
        train_data, val_test_data = train_test_split(data, test_size=0.25, random_state=42)

        
        val_data, test_data = train_test_split(val_test_data, test_size=0.5, random_state=42)


        return train_data,val_data,test_data
        
    def train_model(self,best_params,train_samples,train_labels):
        
        if best_params['model_type'] == 'randomforest':
            model = RandomForestClassifier(n_estimators=best_params['n_estimators'],max_depth=best_params['max_depth'],min_samples_split=best_params['min_sample_split'],min_samples_leaf=best_params['min_samples_leaf'])
            model.fit(train_samples,train_labels)       
        else:
            model = SVC(kernel=best_params['kernel'], C=best_params['C'],gamma=best_params['gamma'])
            model.fit(train_samples,train_labels)
       
        return model 
    
    def test_model(self,model,test_data,test_labels):
        y_pred = model.predict(test_data)
        accuracy = accuracy_score(y_pred, test_labels)
        precision = precision_score(test_labels,y_pred)
        recall = recall_score(test_labels,y_pred)
        f1_scoree = f1_score(test_labels,y_pred,average='macro')

        results_string = "Results for {}: \n".format(self.identifier)
        results_string += f'Results on Features: {self.identifier} are below:\n'
        results_string += "Accuracy: {}\n".format(accuracy)
        results_string += "Precision: {}\n".format(precision)
        results_string += "Recall: {}\n".format(recall)
        results_string += "F1 Macro Score: {}\n".format(f1_scoree)

        return results_string

    
    def load_data(self,pickle_file_path):
        with open(pickle_file_path,'rb') as handle:
            df=pd.read_pickle(handle)
        return df
    
    def save_data(self,destination_path,data):
        with open(os.path.join(destination_path,self.identifier+'.pkl'),'wb') as handle:
            pickle.dump(data,handle)


    def get_data_distributions(self,train_data,test_data):

        train_dict = train_data['labels'].value_counts().to_dict()
        test_dict = test_data['labels'].value_counts().to_dict()

        print(f'Training Data Distribution: \n{train_dict}')
        print(f'Training Data Distribution: \n{test_dict}')

        self.writer.add_text(f'{self.identifier} Train Distribution',f'Training Data Distribution: \n{train_dict}',0)
        self.writer.add_text(f'{self.identifier} Test Distribution',f'Training Data Distribution: \n{train_dict}',0)

        return train_dict,test_dict

    def standardize_data(self, training_data,test_data):

        scaler = preprocessing.StandardScaler().fit(training_data['samples'].tolist())
        training_data_scaled = scaler.transform(training_data['samples'].tolist())
        test_data_scaled = scaler.transform(test_data['samples'].tolist())

        return training_data_scaled.tolist(),test_data_scaled.tolist()
    


    

# Training Based on Different Features And Model Selection and Evaluation on Same Distribution dataset


## Training with Local Binary Pattern Features

Below we will try to obtain Local binary pattern features then with the help of optuna libray find the best performing model along with the best, Then evaluate the chosed model with chosen parametes.

In [None]:
import optuna
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier


LBPF = MultiScaleLocalBinaryPatternFeatures(Configuration['LBPF_parameters']).Calculate_MultiScale_LBPF

classifier = Classification(LBPF,'LBPF',Configuration['PositiveSamples'],Configuration['NegativeSamples'],writer=writer)


# if the features are already computed use load_data function else user load_samples which will first compute the features

train_data = classifier.load_data(r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features\train\LBPF.pkl")
test_data = classifier.load_data(r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features\test\LBPF.pkl")


'''
#if you perform load samples you would probably wanna save the features for not computing the features again and again in order to not lose precious time

classifier.save_data(Configuration['Destination_train'],train_data)
classifier.save_data(Configuration['Destination_test'],test_data)
print('Data saved')
'''

classifier.get_data_distributions(train_data,test_data)
print('distribution achieved')

training_data_scaled, test_data_scaled = classifier.standardize_data(train_data,test_data)

train_data['samples'] = training_data_scaled
test_data['samples'] = test_data_scaled

print('data Standarized')

def model_performance(model):
    """
    Get accuracy score on validation/test data from a trained model
    """
    y_pred = model.predict(test_data['samples'].tolist())
    return round(accuracy_score(y_pred, test_data['labels'].tolist()),3)


def create_model(trial):
    model = 'svm'
    model_type = trial.suggest_categorical('model_type', ['randomforest', 'svm'])
    if model_type == 'svm':
        kernel = trial.suggest_categorical('kernel', ['poly', 'rbf', 'sigmoid'])
        C = trial.suggest_float('C', 10, 210,step=10)
        gamma = trial.suggest_categorical("gamma", ["auto", "scale"])

        model = SVC(kernel=kernel, C=C,gamma=gamma)

    if model_type == 'randomforest':
        n_estimators = trial.suggest_int('n_estimators',100,1000)
        max_depth = trial.suggest_int('max_depth',10,100)
        min_samples_split = trial.suggest_int('min_sample_split',2,50)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,100)
        model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf)


    if trial.should_prune():
            raise optuna.TrialPruned()

    return model

def objective(trial):
    model = create_model(trial)
    model.fit(train_data['samples'].head(15000).tolist(), train_data['labels'].head(15000).tolist())
    return model_performance(model)


study = optuna.create_study(direction="maximize")

study.optimize(objective, n_trials=100, timeout=600)


params = ''
for key, value in study.best_trial.params.items():
    # Concatenate the key-value pair to the result string
    params += f'{key}: {value}, '

writer.add_text('Best Hyper Parameters On LBPF',params)
model_lbpf = classifier.train_model(study.best_trial.params,train_data['samples'].tolist(),train_data['labels'].tolist())
results = classifier.test_model(model_lbpf,test_data['samples'].tolist(),test_data['labels'].tolist())
writer.add_text('Results On LBPF',results)

print(f'Results obtain with: {params}')
print(results)

## Training with Multiscale wavelet Features

Below we will try to obtain MultiScale wavelet features then with the help of optuna libray find the best performing model along with the best, Then evaluate the chosed model with chosen parametes.

In [None]:
MSWF = NlevelWaveletDecomposition(level=3,filter_type='haar').compute_wavelet_statistics

classifier = Classification(MSWF,'MSWF',Configuration['PositiveSamples'],Configuration['NegativeSamples'],writer=writer)

train_data,test_data = classifier.load_samples(['French'])
print('data loaded')

classifier.save_data(Configuration['Destination_train'],train_data)
classifier.save_data(Configuration['Destination_test'],test_data)
print('Data saved')

classifier.get_data_distributions(train_data,test_data)
print('distribution achieved')

training_data_scaled, test_data_scaled = classifier.standardize_data(train_data,test_data)

train_data['samples'] = training_data_scaled
test_data['samples'] = test_data_scaled
print('data Standarized')

def model_performance(model):
    """
    Get accuracy score on validation/test data from a trained model
    """
    y_pred = model.predict(test_data['samples'].tolist())
    return round(accuracy_score(y_pred, test_data['labels'].tolist()),3)


def create_model(trial):
    model = 'svm'
    model_type = trial.suggest_categorical('model_type', ['randomforest', 'svm'])
    if model_type == 'svm':
        kernel = trial.suggest_categorical('kernel', ['poly', 'rbf', 'sigmoid'])
        C = trial.suggest_float('C', 10, 210,step=10)
        gamma = trial.suggest_categorical("gamma", ["auto", "scale"])

        model = SVC(kernel=kernel, C=C,gamma=gamma)

    if model_type == 'randomforest':
        n_estimators = trial.suggest_int('n_estimators',100,1000)
        max_depth = trial.suggest_int('max_depth',10,100)
        min_samples_split = trial.suggest_int('min_sample_split',2,50)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,100)
        model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf)


    if trial.should_prune():
            raise optuna.TrialPruned()

    return model

def objective(trial):
    model = create_model(trial)
    model.fit(train_data['samples'].head(15000).tolist(), train_data['labels'].head(15000).tolist())
    return model_performance(model)


study = optuna.create_study(direction="maximize")

study.optimize(objective, n_trials=100, timeout=600)


params = ''
for key, value in study.best_trial.params.items():
    # Concatenate the key-value pair to the result string
    params += f'{key}: {value}, '


writer.add_text('Best Hyper Parameters On MultiScaleWavelet',params)
model_mswf = classifier.train_model(study.best_trial.params,train_data['samples'].tolist(),train_data['labels'].tolist())
results = classifier.test_model(model_mswf,test_data['samples'].tolist(),test_data['labels'].tolist())
writer.add_text('Ressults  On MultiScaleWavelet',results)

print(f'Results obtain with: {params}')
print(results)

## Training with Markov DCT Features

Below we will try to obtain Markov DCT features then with the help of optuna libray find the best performing model along with the best, Then evaluate the chosed model with chosen parametes.

In [None]:
MARKOV = MarkovFeatures(threshold=3).compute_markov_features

classifier = Classification(MARKOV,'MARKOV',Configuration['PositiveSamples'],Configuration['NegativeSamples'],writer=writer)

train_data,test_data = classifier.load_samples(['French'])
print('data loaded')

classifier.save_data(Configuration['Destination_train'],train_data)
classifier.save_data(Configuration['Destination_test'],test_data)
print('Data saved')

classifier.get_data_distributions(train_data,test_data)
print('distribution achieved')


training_data_scaled, test_data_scaled = classifier.standardize_data(train_data,test_data)
train_data['samples'] = training_data_scaled
test_data['samples'] = test_data_scaled
print('data Standarized')


def model_performance(model):
    """
    Get accuracy score on validation/test data from a trained model
    """
    y_pred = model.predict(test_data['samples'].tolist())
    return round(accuracy_score(y_pred, test_data['labels'].tolist()),3)


def create_model(trial):
    model = 'svm'
    model_type = trial.suggest_categorical('model_type', ['randomforest', 'svm'])
    if model_type == 'svm':
        kernel = trial.suggest_categorical('kernel', ['poly', 'rbf', 'sigmoid'])
        C = trial.suggest_float('C', 10, 210,step=10)
        gamma = trial.suggest_categorical("gamma", ["auto", "scale"])

        model = SVC(kernel=kernel, C=C,gamma=gamma)

    if model_type == 'randomforest':
        n_estimators = trial.suggest_int('n_estimators',100,1000)
        max_depth = trial.suggest_int('max_depth',10,100)
        min_samples_split = trial.suggest_int('min_sample_split',2,50)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,100)
        model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf)


    if trial.should_prune():
            raise optuna.TrialPruned()

    return model

def objective(trial):
    model = create_model(trial)
    model.fit(train_data['samples'].head(15000).tolist(), train_data['labels'].head(15000).tolist())
    return model_performance(model)


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)
params = ''
for key, value in study.best_trial.params.items():
    # Concatenate the key-value pair to the result string
    params += f'{key}: {value}, '


writer.add_text('Best Hyper Parameters On MarkovDCT',params)
model_markov = classifier.train_model(study.best_trial.params,train_data['samples'].tolist(),train_data['labels'].tolist())
results = classifier.test_model(model_markov,test_data['samples'].tolist(),test_data['labels'].tolist())
writer.add_text('Ressults On MarkovDCT',results)

print(f'Results obtain with: {params}')
print(results)

## Training with Resnet18 Features

Below we will try to obtain Resnet18 features then with the help of optuna libray find the best performing model along with the best, Then evaluate the chosed model with chosen parametes.

In [None]:
import optuna
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

model_name = 'ResNet18'
ResNet = ResnetFeatures(model_name, Configuration['device']).get_features

classifier = Classification(ResNet,model_name,Configuration['PositiveSamples'],Configuration['NegativeSamples'],writer=writer)

train_data,test_data = classifier.load_samples(['French'])
print('data loaded')

classifier.save_data(Configuration['Destination_train'],train_data)
classifier.save_data(Configuration['Destination_test'],test_data)
print('Data saved')

display(train_data.head(10))
display(test_data.head(10))

classifier.get_data_distributions(train_data,test_data)
print('distribution achieved')

def model_performance(model):
    """
    Get accuracy score on validation/test data from a trained model
    """
    y_pred = model.predict(test_data['samples'].tolist())
    return round(accuracy_score(y_pred, test_data['labels'].tolist()),3)


def create_model(trial):
    model = 'svm'
    model_type = trial.suggest_categorical('model_type', ['randomforest', 'svm'])
    if model_type == 'svm':
        kernel = trial.suggest_categorical('kernel', ['poly', 'rbf', 'sigmoid'])
        C = trial.suggest_float('C', 10, 210,step=10)
        gamma = trial.suggest_categorical("gamma", ["auto", "scale"])

        model = SVC(kernel=kernel, C=C,gamma=gamma)

    if model_type == 'randomforest':
        n_estimators = trial.suggest_int('n_estimators',100,1000)
        max_depth = trial.suggest_int('max_depth',10,100)
        min_samples_split = trial.suggest_int('min_sample_split',2,50)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,100)
        model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf)


    if trial.should_prune():
            raise optuna.TrialPruned()

    return model

def objective(trial):
    model = create_model(trial)
    model.fit(train_data['samples'].head(10000).tolist(), train_data['labels'].head(10000).tolist())
    return model_performance(model)


study = optuna.create_study(direction="maximize")

study.optimize(objective, n_trials=100, timeout=600)


params = ''
for key, value in study.best_trial.params.items():
    # Concatenate the key-value pair to the result string
    params += f'{key}: {value}, '

writer.add_text('Best Hyper Parameters On Resnet18',params)
model_resnet18 = classifier.train_model(study.best_trial.params,train_data['samples'].tolist(),train_data['labels'].tolist())
results = classifier.test_model(model_resnet18,test_data['samples'].tolist(),test_data['labels'].tolist())
writer.add_text('Ressults On Resnet18',results)

print(f'Results obtain with: {params}')
print(results)


## Training with Resnet50 Features

Below we will try to obtain Resnet50 features then with the help of optuna libray find the best performing model along with the best, Then evaluate the chosed model with chosen parametes.

In [None]:
import optuna
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

model_name = 'ResNet50'
ResNet = ResnetFeatures(model_name,Configuration['device']).get_features

classifier = Classification(ResNet,model_name,Configuration['PositiveSamples'],Configuration['NegativeSamples'],writer)

train_data,test_data = classifier.load_samples(['French'])
print('data loaded')

classifier.save_data(Configuration['Destination_train'],train_data)
classifier.save_data(Configuration['Destination_test'],test_data)
print('Data saved')

display(train_data.head(10))
display(test_data.head(10))

classifier.get_data_distributions(train_data,test_data)
print('distribution achieved')


def model_performance(model):
    """
    Get accuracy score on validation/test data from a trained model
    """
    y_pred = model.predict(test_data['samples'].tolist())
    return round(accuracy_score(y_pred, test_data['labels'].tolist()),3)


def create_model(trial):
    model = 'svm'
    model_type = trial.suggest_categorical('model_type', ['randomforest', 'svm'])
    if model_type == 'svm':
        kernel = trial.suggest_categorical('kernel', ['poly', 'rbf', 'sigmoid'])
        C = trial.suggest_float('C', 10, 310,step=10)
        gamma = trial.suggest_categorical("gamma", ["auto", "scale"])

        model = SVC(kernel=kernel, C=C,gamma=gamma)

    if model_type == 'randomforest':
        n_estimators = trial.suggest_int('n_estimators',100,1000)
        max_depth = trial.suggest_int('max_depth',10,100)
        min_samples_split = trial.suggest_int('min_sample_split',2,50)
        min_samples_leaf = trial.suggest_int('min_samples_leaf',1,100)
        model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf)


    if trial.should_prune():
            raise optuna.TrialPruned()

    return model

def objective(trial):
    model = create_model(trial)
    model.fit(train_data['samples'].head(10000).tolist(), train_data['labels'].head(10000).tolist())
    return model_performance(model)


study = optuna.create_study(direction="maximize")

study.optimize(objective, n_trials=100, timeout=600)

params = ''
for key, value in study.best_trial.params.items():
    # Concatenate the key-value pair to the result string
    params += f'{key}: {value}, '

params = ''
for key, value in study.best_trial.params.items():
    # Concatenate the key-value pair to the result string
    params += f'{key}: {value}, '

writer.add_text('Best Hyper Parameters On Resnet50',params)
model_resnet50 = classifier.train_model(study.best_trial.params,train_data['samples'].tolist(),train_data['labels'].tolist())
results = classifier.test_model(model_resnet50,test_data['samples'].tolist(),test_data['labels'].tolist())
writer.add_text('Ressults On Resnet50',results)

print(f'Results obtain with: {params}')
print(results)

# IN this seciton we will Test our trained models on on DLC dataset



## On LBPF Features

In [None]:
destination_path = r'C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features_dlc'

LBPF = MultiScaleLocalBinaryPatternFeatures(Configuration['LBPF_parameters']).Calculate_MultiScale_LBPF

classifier = Classification(LBPF,'LBPF',Configuration['PostiveSamples_dlc'],Configuration['NegativeSamples_dlc'],writer)

data,_ = classifier.load_samples(['French'])

classifier.save_data(r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features_dlc",data)

results = classifier.test_model(model_lbpf,data['samples'].tolist(),data['labels'].tolist())

writer.add_text('Results On dlc features LBPF', results)

print(results)


## On MSWF Features

In [None]:
MSWF = NlevelWaveletDecomposition(level=3,filter_type='haar').compute_wavelet_statistics

classifier = Classification(MSWF,'MSWF',Configuration['PostiveSamples_dlc'],Configuration['NegativeSamples_dlc'],writer)

data,_ = classifier.load_samples(['French'])

classifier.save_data(r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features_dlc",data)

results = classifier.test_model(model_mswf,data['samples'].tolist(),data['labels'].tolist())

writer.add_text('Results On dlc features MSWF', results)

print(results)

## On MARKOV Features

In [None]:
model_name = 'MARKOV'

MARKOV = MarkovFeatures(threshold=3).compute_markov_features

classifier = Classification(MARKOV,model_name,Configuration['PostiveSamples_dlc'],Configuration['NegativeSamples_dlc'],writer)

data,_ = classifier.load_samples(['French'])

classifier.save_data(r"C:\Users\Utente\Projects\Thesis\Evaluations\Machine Learning Based\Extracted_features_dlc",data)

results = classifier.test_model(model_markov,data['samples'].tolist(),data['labels'].tolist())

writer.add_text('Results On dlc features Markov', results)

print(results)


## On RESNET18 Features

In [None]:
model_name = 'ResNet18'

RESNET = ResnetFeatures(model_name,Configuration['device']).get_features

classifier = Classification(RESNET,model_name,Configuration['PostiveSamples_dlc'],Configuration['NegativeSamples_dlc'],writer)

data, _ = classifier.load_samples(['French'])

results = classifier.test_model(model_resnet18,data['samples'].tolist(),data['labels'].tolist())

writer.add_text('Results On dlc features Resnet18', results)

print(results)

## On RESNET50 Features

In [None]:
model_name = 'ResNet50'

RESNET = ResnetFeatures(model_name,Configuration['device']).get_features

classifier = Classification(RESNET,model_name,Configuration['PostiveSamples_dlc'],Configuration['NegativeSamples_dlc'], writer)

data, _ = classifier.load_samples(['French'])

results = classifier.test_model(model_resnet50,data['samples'].tolist(),data['labels'].tolist())

writer.add_text('Results On dlc features Resnet50', results)

print(results)

