In [1]:
!pip install scipy pillow numpy matplotlib scikit-learn pandas



In [4]:
#Module1
# feature_extraction

import os
import numpy as np
import pandas as pd
from PIL import Image
from scipy.fftpack import dct
import scipy.stats as stats

def get_dct_values(image_path):
    # Open the image file
    img = Image.open(image_path)
    # Convert the image to grayscale
    img = img.convert('L')
    # Convert the image data to a numpy array
    img_data = np.array(img)
    # Compute the 2D DCT of the image data
    dct_values = dct(dct(img_data.T).T)
    return dct_values

def count_first_digit_probabilities(dct_values):
    # Flatten the DCT values to a 1D array
    flat_values = dct_values.flatten()
    # Initialize a dictionary to hold the probabilities of the first digits
    probabilities = {i: 0 for i in range(1, 10)}
    # Iterate over the DCT values
    for value in flat_values:
        # Get the first digit of the DCT value
        first_digit = int(str(abs(value))[0])
        # Update the count
        if first_digit in probabilities:
            probabilities[first_digit] += 1
    # Convert counts to probabilities
    total = sum(probabilities.values())
    probabilities = {k: v / total for k, v in probabilities.items()}
    return probabilities

#Module2
# benfords_law_verification

def calculate_first_digit(x):
    while x >= 10:
        x /= 10
    return int(x)

def benfords_law():
    return [np.log10(1 + 1/digit) for digit in range(1, 10)]

def chi_square_test(observed, expected):
    return stats.chisquare(observed, expected)

def benfords_law_classifier(chi_square_stat, p_value):
    # If the p-value is greater than 0.05, we accept the null hypothesis that the observed frequencies are not significantly different from the expected frequencies
    if p_value > 0.05:
        return True
    else:
        return False

def process_images_in_folder(folder_path):
    # Get a list of all files in the folder
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
    results = []

    # Loop through each image file in the folder
    for image_file in image_files:
        # Create the full path to the image
        image_path = os.path.join(folder_path, image_file)

        # Get DCT values for the current image
        dct_values = get_dct_values(image_path)

        # Count first digit probabilities for the current image
        probabilities = count_first_digit_probabilities(dct_values)

        # Calculate observed and expected frequencies for Benford's Law verification
        observed_probabilities = [probabilities[digit] for digit in range(1, 10)]
        expected_probabilities = benfords_law()

        # Perform Chi-square test
        chi_square_stat, p_value = chi_square_test(observed_probabilities, expected_probabilities)

        # Classify if the image follows Benford's Law
        follows_benfords_law = benfords_law_classifier(chi_square_stat, p_value)

        # Append results to the list
        results.append([image_file, follows_benfords_law, chi_square_stat, p_value] + observed_probabilities)

    # Convert results to a DataFrame and save to a CSV file
    df = pd.DataFrame(results, columns=['Image File', 'Follows Benford\'s Law', 'Chi-square Statistic', 'P-value'] + [f'Probability {i}' for i in range(1, 10)])
    df.to_csv('benfords_law_results2.csv', index=False)

# Example usage
#folder_path ='/Users/reyna/CSCI158ProjectServer/source/20realface'
#process_images_in_folder(folder_path)
folder_path1 ='/Users/reyna/CSCI158ProjectServer/source/20fakeface'
process_images_in_folder(folder_path1)