## Random Forest Classifier

In [4]:
import numpy as np
import cv2
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [37]:
def show_images(image):
    """
    image - image to be shown
    """

    fig, axes = plt.subplots(2, 4, figsize=(10, 6))

    for i, ax in enumerate(axes.flatten()):
        ax.imshow(image)
        ax.set_title(image_files[i][:10])  # Set the title as the image filename
        ax.axis('off')  # Turn off axis labels

    # Adjust the spacing between subplots
    plt.tight_layout()

    # Show the plot
    plt.show()

In [10]:
def ela(imagePath, scale=10, quality=90):
    """
    Performs Error Level Analysis (ELA) on an image.
    imagePath (str) - the path to the image file.
    scale (int) - the scale factor for resizing the image. Default is 10.

    Returns:
        elaImage (np.array) - the ELA image.
    """
    # Load the image
    image = cv2.imread(imagePath)

    # Resize the image
    resizedImage = cv2.resize(image, (scale, scale))

    # Convert the image to JPEG
    cv2.imwrite("temp.jpg", resizedImage, [cv2.IMWRITE_JPEG_QUALITY, quality])

    # Load the JPEG image and compute the ELA image
    elaImage = cv2.imread("temp.jpg")
    elaImage = cv2.absdiff(resizedImage, elaImage)
    elaImage = cv2.cvtColor(elaImage, cv2.COLOR_BGR2GRAY)

    return elaImage

In [12]:
def extract_features(elaImage):
    features = []
    features.append(np.mean(elaImage))
    features.append(np.std(elaImage))
    features.append(np.var(elaImage))
    
    # Histogram features
    hist = cv2.calcHist([elaImage], [0], None, [256], [0, 256])
    features.extend(hist.flatten()[:10])

    return np.array(features)


In [16]:
authentic = 'data/casia/au/'
tampered = 'data/casia/tp/'

In [44]:
# Mix photos
X = list()
Y = list()
for files in os.listdir(authentic):
    X.append(authentic+files)
    Y.append('Au')
for files in os.listdir(tampered):
    X.append(tampered+files)
    Y.append('Tp')
    
le = preprocessing.LabelEncoder()
y = le.fit_transform(Y)

In [48]:
X_a = []
y_a = []
for i, img_path in enumerate(X):
    ela_img = ela(img_path, scale=10, quality=90)
    features = extract_features(ela_img)
    X_a.append(features)
    y_a.append(y[i])  # Label: genuine (0) or tampered (1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_a, y_a, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.50      0.33      0.40        12
           1       0.38      0.56      0.45         9

    accuracy                           0.43        21
   macro avg       0.44      0.44      0.43        21
weighted avg       0.45      0.43      0.42        21

