In [3]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split    
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

fake_dir = "E:\\College\\5th Sem\\ML\\Lab\\MLProject\\train\\FAKE"
real_dir = "E:\\College\\5th Sem\\ML\\Lab\\MLProject\\train\\REAL"
    
num_images_to_load = 10

In [4]:
def to_gray(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

In [5]:
def n_blocks(arr, tile_size=4):
    rows, cols = arr.shape

    tilings = [arr[i:i + tile_size, j:j + tile_size] for i in range(0, rows, tile_size) for j in range(0, cols, tile_size)]
    #print(tilings)
    return np.array(tilings)

In [6]:
def block_mean_var(arr):
    means_vars = []
    for block in arr:
        means_vars.append([np.mean(block) , np.var(block)])
    return np.array(means_vars)

In [7]:
def img_features(img):
    blocks = n_blocks(img)
    return block_mean_var(blocks)

In [8]:
def load_img_and_extract_features(filepath, label):
    img = cv2.imread(filepath)
    gray_img = to_gray(img)
    features = img_features(gray_img)
    return [i[0] for i in features],[i[1] for i in features], label

In [9]:
def load_imgs_from_directory(directory, label, num_images=None):
    images_m = []
    images_v = []
    labels = []
    files = os.listdir(directory)
    
    with ThreadPoolExecutor(max_workers=8) as executor:
        futures = [executor.submit(load_img_and_extract_features, os.path.join(directory, filename), label) for filename
                   in files]
        
        for i in files[:num_images]:
            mean, var, lbl = load_img_and_extract_features(os.path.join(directory, i), label)
            images_m.append(mean)
            images_v.append(var)
            labels.append(lbl)
    return images_m, images_v, labels

In [10]:
import matplotlib.pyplot as plt 

fake_images_m,fake_images_v, fake_labels = load_imgs_from_directory(fake_dir, 0, num_images=num_images_to_load)
real_images_m,real_images_v, real_labels = load_imgs_from_directory(real_dir, 1, num_images=num_images_to_load)
for j in range(8):
    a=[i[j] for i in real_images_m]
    b=[i[j] for i in fake_images_m]
    print("Mean of "+str(j+1)+"th block of real images :- " + np.mean(a))
    print("Mean of "+str(j+1)+"th block of fake images :- " + np.mean(b))
    print("Std of "+str(j+1)+"th block of real images :- " + np.std(a))
    print("Std of "+str(j+1)+"th block of fake images :- " + np.std(b))
    print("Distance between "+str(j+1)+"th block mean of fake images and real :- " + np.lingalg.norm(a-b))

a=[i[j] for i in real_images_m]
b=[i[j] for i in fake_images_m]
plt.hist(x=a, bins=30, color='skyblue')
plt.hist(x=b, bins=30, color='orange')
 
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Basic Histogram')
plt.show()

features_m = np.vstack((fake_images_m, real_images_m))
features_v = np.vstack((fake_images_v, real_images_v))
labels = np.concatenate((fake_labels, real_labels))

In [None]:
x_train, x_test, y_train, y_test = train_test_split(features_m, features_v, labels, test_size=0.5, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)

accuracy = knn.score(x_test, y_test)
print("Accuracy:", accuracy)

training_accuracy = knn.score(x_train, y_train)
print(f"Training Accuracy: {training_accuracy}")
predictions = knn.predict(x_test[:20])
actual_labels = y_test[:20]

# for i in range(len(predictions)):
#     print(f"Predicted: {predictions[i]}\tActual: {actual_labels[i]}")   

predicted_vs_actual = pd.DataFrame({"Actual": actual_labels, "Predicted": predictions})
predicted_vs_actual

Accuracy: 0.69208
Training Accuracy: 0.8389


Unnamed: 0,Actual,Predicted
0,1,1
1,1,1
2,0,0
3,1,1
4,1,1
5,1,1
6,1,0
7,1,0
8,1,1
9,1,0


In [None]:
k_values = range(1, 100)
accuracies = []

import matplotlib as plt                                     

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(x_train, y_train)
    y_pred = knn.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies.append(accuracy)
    # print(f"Accuracy for k={k}: {accuracy}")  
            
accuracies_df = pd.DataFrame({"K-Value":k_values,"Accuracy": accuracies},index=k_values)


plt.figure(figsize=(40,20))
plt.plot(k_values, accuracies, marker='o', linestyle='-', color='b')
plt.title('k-NN Accuracy vs. Number of Neighbors (k)')
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('Accuracy')
plt.xticks(k_values)

plt.grid(True)
plt.show()

accuracies_df   

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import  confusion_matrix,precision_score,recall_score,f1_score

conf_mat = confusion_matrix(y_train,y_pred)
precision = precision_score(y_test,y_pred)
recall = recall_score(y_test,y_pred)
f_score = f1_score(y_test,y_pred)

print(f"Confusion Matrix:\n{conf_mat}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f_score}")

Confusion Matrix:
[[1465 3501]
 [1554 3480]]
Precision: 0.6193955020770663
Recall: 0.8707209021345147
F1 Score: 0.7238637314807065
