In [None]:
import cv2
import os
import numpy as np

# Define Gabor filter parameters
orientations = 8
frequencies = [0.1, 0.5, 1.0]
kernel_size = 21  # Adjust this based on your image size and requirements

# Specify the folder containing your images
image_folder = '/content/sample_data/dataset'

# Create an output folder to save the preprocessed images
output_folder = '/content/sample_data/preprocessed'
os.makedirs(output_folder, exist_ok=True)

# Initialize the Gabor filter bank
gabor_filters = []

# Create the Gabor filters
for theta in range(orientations):
    for freq in frequencies:
        kernel = cv2.getGaborKernel(
            (kernel_size, kernel_size),
            sigma=4.0,  # Adjust the sigma value as needed
            theta=theta * (np.pi / orientations),
            lambd=10.0 / freq,
            gamma=0.5,
            psi=0,
        )
        gabor_filters.append(kernel)

# Process each image in the folder
for filename in os.listdir(image_folder):
    if filename.endswith(('.jpg', '.jpeg', '.png')):  # Filter for image file extensions
        # Load the image
        image_path = os.path.join(image_folder, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Initialize an empty list to store the filtered images
        filtered_images = []

        # Apply the Gabor filters to the image
        for kernel in gabor_filters:
            filtered_image = cv2.filter2D(image, cv2.CV_64F, kernel)
            filtered_images.append(filtered_image)

        # Save the preprocessed images to the output folder
        output_filename = os.path.splitext(filename)[0] + '_preprocessed.jpg'
        output_path = os.path.join(output_folder, output_filename)
        preprocessed_image = np.hstack(filtered_images)  # Combine filtered images side by side
        cv2.imwrite(output_path, preprocessed_image)

print("Preprocessing complete.")

Preprocessing complete.


In [None]:
from zipfile import ZipFile

In [None]:
with ZipFile('/content/MICC-F220.zip','r') as z:
  z.extractall("/content/sample_data/dataset")

In [None]:
!pip install zipfile36

Collecting zipfile36
  Downloading zipfile36-0.1.3-py3-none-any.whl (20 kB)
Installing collected packages: zipfile36
Successfully installed zipfile36-0.1.3


In [None]:
import cv2
import os
import numpy as np
from skimage import feature

# Specify the folder containing your preprocessed images
preprocessed_folder = '/content/sample_data/preprocessed'

# Create an output folder to save the LBP feature vectors
lbp_output_folder = '/content/sample_data/features'
os.makedirs(lbp_output_folder, exist_ok=True)

# LBP parameters
radius = 1
n_points = 8 * radius

# Process each preprocessed image
for filename in os.listdir(preprocessed_folder):
    if filename.endswith(('_preprocessed.jpg')):  # Filter for preprocessed images
        # Load the preprocessed image
        image_path = os.path.join(preprocessed_folder, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Compute LBP features
        lbp_image = feature.local_binary_pattern(image, n_points, radius, method='uniform')

        # Flatten the LBP image into a feature vector
        lbp_feature_vector, _ = np.histogram(lbp_image.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))

        # Save the LBP feature vector as a CSV file or any other format you prefer
        output_filename = os.path.splitext(filename)[0] + '_lbp.csv'
        output_path = os.path.join(lbp_output_folder, output_filename)
        np.savetxt(output_path, lbp_feature_vector, delimiter=',')

print("LBP feature extraction complete.")

LBP feature extraction complete.


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from collections import Counter

# Load LBP features and labels
lbp_feature_folder = '/content/sample_data/features'
labels = []  # Store class labels
feature_vectors = []  # Store LBP feature vectors

# Iterate through the LBP feature files
for filename in os.listdir(lbp_feature_folder):
    if filename.endswith('_lbp.csv'):
        # Extract class label from the filename
        class_label = filename.split('_')[0]

        # Load LBP feature vector
        feature_vector = np.loadtxt(os.path.join(lbp_feature_folder, filename), delimiter=',')

        labels.append(class_label)
        feature_vectors.append(feature_vector)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Check if there are classes with very few samples
class_counts = Counter(y_train)
min_class_count = min(class_counts.values())

# Apply SMOTE only if the minimum class count is greater than 1
if min_class_count > 1:
    # Apply SMOTE to handle imbalanced classes
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)
else:
    # Use the original data without SMOTE
    X_train_scaled = StandardScaler().fit_transform(X_train)
    X_test_scaled = StandardScaler().fit_transform(X_test)
    y_train_resampled = y_train

# Initialize and train a K-Nearest Neighbors (KNN) classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (K) as needed
knn_classifier.fit(X_train_scaled, y_train_resampled)

# Predict the labels for the scaled test data
y_pred = knn_classifier.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Print classification report for more detailed metrics
print(classification_report(y_test, y_pred))


Accuracy: 47.73%
               precision    recall  f1-score   support

          CRW       0.57      1.00      0.73         8
          DSC       0.90      0.60      0.72        15
       DSCF10       0.00      0.00      0.00         0
       DSCF13       0.00      0.00      0.00         1
       DSCF15       0.00      0.00      0.00         0
        DSCF2       0.00      0.00      0.00         1
     DSCF2059       0.00      0.00      0.00         0
   DSCF8tamp1       0.00      0.00      0.00         0
 DSCF8tamp132       0.00      0.00      0.00         1
 DSCF8tamp133       0.00      0.00      0.00         0
 DSCF8tamp237       0.00      0.00      0.00         1
  DSCF8tamp25       0.00      0.00      0.00         1
        DSCF9       0.00      0.00      0.00         1
     DSCN2322       0.00      0.00      0.00         0
     DSCN2329       0.00      0.00      0.00         1
       DSCN41       0.00      0.00      0.00         0
  DSCN41tamp1       0.00      0.00      0.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load LBP features and labels
lbp_feature_folder = '/content/sample_data/features'
labels = []  # Store class labels
feature_vectors = []  # Store LBP feature vectors

# Iterate through the LBP feature files
for filename in os.listdir(lbp_feature_folder):
    if filename.endswith('_lbp.csv'):
        # Extract class label from the filename
        class_label = filename.split('_')[0]

        # Load LBP feature vector
        feature_vector = np.loadtxt(os.path.join(lbp_feature_folder, filename), delimiter=',')

        labels.append(class_label)
        feature_vectors.append(feature_vector)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Initialize the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors (K) as needed

# Train the classifier on the training data
knn_classifier.fit(X_train, y_train)

# Predict the labels for the test data
y_pred = knn_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 52.27%


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load LBP features and labels
lbp_feature_folder = '/content/sample_data/features'
labels = []  # Store class labels
feature_vectors = []  # Store LBP feature vectors

# Iterate through the LBP feature files
for filename in os.listdir(lbp_feature_folder):
    if filename.endswith('_lbp.csv'):
        # Extract class label from the filename
        class_label = filename.split('_')[0]

        # Load LBP feature vector
        feature_vector = np.loadtxt(os.path.join(lbp_feature_folder, filename), delimiter=',')

        labels.append(class_label)
        feature_vectors.append(feature_vector)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Hyperparameter tuning using GridSearchCV
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}
knn_classifier = KNeighborsClassifier()
grid_search = GridSearchCV(knn_classifier, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

best_k = grid_search.best_params_['n_neighbors']

# Initialize the KNN classifier with the best K
knn_classifier = KNeighborsClassifier(n_neighbors=best_k)

# Train the classifier on the scaled training data
knn_classifier.fit(X_train_scaled, y_train)

# Predict the labels for the scaled test data
y_pred = knn_classifier.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')




Accuracy: 47.73%


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from collections import Counter

# Load LBP features and labels
lbp_feature_folder = '/content/sample_data/features'
labels = []  # Store class labels
feature_vectors = []  # Store LBP feature vectors

# Iterate through the LBP feature files
for filename in os.listdir(lbp_feature_folder):
    if filename.endswith('_lbp.csv'):
        # Extract class label from the filename
        class_label = filename.split('_')[0]

        # Load LBP feature vector
        feature_vector = np.loadtxt(os.path.join(lbp_feature_folder, filename), delimiter=',')

        labels.append(class_label)
        feature_vectors.append(feature_vector)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Check if there are classes with very few samples
class_counts = Counter(y_train)
min_class_count = min(class_counts.values())

# Apply SMOTE only if the minimum class count is greater than 1
if min_class_count > 1:
    # Apply SMOTE to handle imbalanced classes
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)
else:
    # Use the original data without SMOTE
    X_train_scaled = StandardScaler().fit_transform(X_train)
    X_test_scaled = StandardScaler().fit_transform(X_test)
    y_train_resampled = y_train

# Initialize and train a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled, y_train_resampled)

# Predict the labels for the scaled test data
y_pred = rf_classifier.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Print classification report for more detailed metrics
print(classification_report(y_test, y_pred))


Accuracy: 56.82%
               precision    recall  f1-score   support

          CRW       0.80      1.00      0.89         8
          DSC       0.93      0.87      0.90        15
       DSCF10       0.00      0.00      0.00         0
       DSCF13       0.00      0.00      0.00         1
        DSCF2       0.00      0.00      0.00         1
        DSCF6       0.00      0.00      0.00         0
   DSCF8tamp1       0.00      0.00      0.00         0
 DSCF8tamp132       0.00      0.00      0.00         1
 DSCF8tamp237       0.00      0.00      0.00         1
  DSCF8tamp25       0.00      0.00      0.00         1
  DSCF8tamp27       0.00      0.00      0.00         0
  DSCF8tamp37       0.00      0.00      0.00         0
        DSCF9       0.00      0.00      0.00         1
     DSCN2329       0.00      0.00      0.00         1
       DSCN41       0.00      0.00      0.00         0
DSCN41tamp131       0.00      0.00      0.00         1
DSCN41tamp132       0.00      0.00      0.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import os
import cv2
import numpy as np
from sklearn.neighbors import NearestNeighbors
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler

# Define parameters
patch_size = (32, 32)  # Adjust patch size as needed
num_neighbors = 5  # Number of nearest neighbors in KNN
threshold = 0.5  # Adjust the similarity threshold

# Function to extract features from an image patch (replace with your feature extraction method)
def extract_features(patch):
    # Example: Using Local Binary Pattern (LBP) as a feature
    lbp_image = local_binary_pattern(patch, P=8, R=1, method='uniform')
    histogram, _ = np.histogram(lbp_image, bins=np.arange(0, 60), range=(0, 59))
    return histogram

# Function to load training data (replace with your own dataset loading)
def load_training_data():
    # Placeholder data (replace with your dataset loading code)
    num_samples = 200
    num_features = 59
    feature_vectors_train = np.random.rand(num_samples, num_features)
    labels_train = np.random.randint(2, size=num_samples)
    return feature_vectors_train, labels_train

# Function to detect forgeries in an image
def detect_forgeries(input_image_path, knn_classifier, scaler, feature_vectors_train):
    input_image = cv2.imread(input_image_path, cv2.IMREAD_GRAYSCALE)
    height, width = input_image.shape
    forgery_detection_image = input_image.copy()

    for y in range(0, height - patch_size[1] + 1):
        for x in range(0, width - patch_size[0] + 1):
            patch = input_image[y:y + patch_size[1], x:x + patch_size[0]]
            features = extract_features(patch)
            standardized_features = scaler.transform([features])

            _, indices = knn_classifier.kneighbors(standardized_features)

            for index in indices[0]:
                similarity = np.sum((feature_vectors_train[index] - standardized_features) ** 2)
                if similarity < threshold:
                    forgery_detection_image[y:y + patch_size[1], x:x + patch_size[0]] = 255  # White patch

    output_image_path = 'output_' + os.path.basename(input_image_path)
    cv2.imwrite(output_image_path, forgery_detection_image)

# Load the training dataset (authentic and tampered images)
feature_vectors_train, labels_train = load_training_data()

# Standardize the feature vectors
scaler = StandardScaler()
feature_vectors_train = scaler.fit_transform(feature_vectors_train)

# Initialize and train the KNN model
knn_classifier = NearestNeighbors(n_neighbors=num_neighbors)
knn_classifier.fit(feature_vectors_train)

# Specify the folder containing the images to be processed
input_image_folder = '/content/sample_data/dataset'

# Process each image in the folder
for filename in os.listdir(input_image_folder):
    if filename.endswith(('.jpg', '.jpeg', '.png')):  # Filter for image file extensions
        input_image_path = os.path.join(input_image_folder, filename)
        detect_forgeries(input_image_path, knn_classifier, scaler, feature_vectors_train)
