In [None]:
import os
import cv2
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

from constants import DatasetPath

# UTILITIES

In [None]:
# Make sure the Dataset path has been specified in the 'constants.py' file
DATASET_PATH = DatasetPath.effectivePath

def DATASET_DIRS():
	return os.listdir(DATASET_PATH)

#CYCLE_GAN and PRO_GAN contains both real and fake images

REAL_DIRS = ["afhq", "celebahq", "coco", "ffhq", "imagenet", "landscape", "lsun", "metfaces", "cycle_gan", "pro_gan"]

FAKE_DIRS = ["big_gan", "cips", "cycle_gan", "ddpm", "denoising_diffusion_gan", "diffusion_gan", "face_synthetics", 
				 "gansformer", "gau_gan", "generative_inpainting", "glide", "lama", "latent_diffusion", "mat", "palette", 
				 "pro_gan", "projected_gan", "sfhq", "stable_diffusion", "star_gan", "stylegan1", "stylegan2", "stylegan3",
				 "taming_transformer", "vq_diffusion"]

csv_columns_name = ['filename', 'image_path', 'target', 'category']


DIR_FOURIER_PATH = DATASET_PATH + "fourier\\"
FFTS_PATH = DIR_FOURIER_PATH + "fourier\\"

### PREP

In [None]:
ds_partition_df = pd.read_csv(DATASET_PATH + "dataset_partition.csv")

ds_partition_df

In [None]:
def df_to_csv(df, filename, path):
	"""
	Splits the DataFrame in chunks to enable tqdm progress visualization while converting the DataFrame into a '.csv' file.

	Parametres
	----------
		df (pd.DataFrame): the DataFrame to convert.
		filename (str): the desired file name (comprehensive of '.csv' extension).
		path (str): the path where the '.csv' will be stored.
	"""
	chunks = np.array_split(df.index, 100)
	for chunck, subset in enumerate(tqdm(chunks, desc="Creating \'" + filename + "\' file")):
		if chunck == 0: # first row
			df.loc[subset].to_csv(path, mode='w', index=False)
		else:
			df.loc[subset].to_csv(path, header=None, mode='a', index=False)

	print("\'" + filename + "\' has been successfully created.")

In [None]:
if("fourier" in DATASET_DIRS()):
	print("Fourier folder already exist.")
else:
	mode = 0o666
	path = os.path.join(DATASET_PATH,"fourier")
	#creates the fourier folder in the main Dataset folder
	os.mkdir(path,mode)
	
	#creates a fourier folder in the fourier folder
	os.mkdir(path + "\\fourier", mode)

# FFT APPLICATION

### FFT FUNCTION

In [None]:
def greyscale_FFT(img_path):
	"""
	Applies Fast Fourier Transform (FFT) to a greyscale image and returns its magnitude spectrum.

	Parameters
	----------
		img_path (str): input image path.

	Returns
	-------
		fft_img (np.ndarray): a 2D array representing the magnitude spectrum of the FFT of the input image, normalized to the range [0, 255].
	"""
	
	# Read the image from the specified path in BGR color format
	RGBimg = cv2.imread(img_path)
	
	# Convert the image from BGR to grayscale
	grayImg = cv2.cvtColor(RGBimg, cv2.COLOR_BGR2GRAY)
	
	# Apply the 2D FFT to the grayscale image
	fft_img = np.fft.fft2(grayImg)
	
	# Compute the logarithm of the absolute value of the FFT to get the magnitude
	fft_img = np.log(np.abs(fft_img))

	# Find the minimum and maximum values of the magnitude for normalization
	min_val = np.min(fft_img)
	max_val = np.max(fft_img)
	
	# Normalize the magnitude image to the range [0, 255]
	fft_img = (fft_img - min_val) * (255.0 / (max_val - min_val))
	
	# Convert the normalized image to uint8 (integer values from 0 to 255)
	fft_img = np.uint8(fft_img)

	# Return the normalized magnitude image
	return fft_img

In [None]:
def load_paths_from_csv(training_csv, test_csv):
	"""
	Load image paths from the given training and test CSV files.
	
	Parameters
	----------
	training_csv : str
		Path to the training CSV file.
	test_csv : str
		Path to the test CSV file.
		
	Returns
	-------
	set
		Set containing all unique image paths from the training and test CSV files.
	"""
	# Load the training and test CSV files
	training_df = pd.read_csv(training_csv)
	test_df = pd.read_csv(test_csv)
	
	# Extract paths from trainingSet.csv
	training_paths = pd.concat([training_df['anchor'], training_df['positive'], training_df['negative']]).unique()
	
	# Extract paths from testSet.csv
	test_paths = pd.concat([test_df['real'], test_df['fake']]).unique()
	
	# Combine all paths into a set to ensure uniqueness
	all_paths = set(training_paths).union(set(test_paths))
	
	print(f"Detected Images to Convert: {len(all_paths)}")

	return all_paths

def FFT_application(ds_partition_df, training_csv, test_csv):
	"""
	Applies Fast Fourier Transform (FFT) to the images in the given dataset.
	The function also saves resulting images to the proper directories and generates a 'metadata.csv' for utility.

	Parameters
	----------
	ds_partition_df : pd.DataFrame
		DataFrame containing dataset partition information with columns 'image_path', 'filename', 'target', and 'category'.
	training_csv : str
		Path to the training CSV file.
	test_csv : str
		Path to the test CSV file.

	Returns
	-------
	dict
		Dictionary mapping original image paths to their Fourier transformed paths.
	"""
	# Load paths from training and test CSV files
	valid_paths = load_paths_from_csv(training_csv, test_csv)
	
	# DataFrame to store Fourier metadata
	fourier_metadata_df = pd.DataFrame(columns=csv_columns_name)
	
	# Dictionary to store RGB-Fourier mappings
	path_matching_dict = {}

	# Iterate through the dataset partition DataFrame
	for index, row in tqdm(ds_partition_df.iterrows(), total=ds_partition_df.shape[0], desc="FFT application"):
		# Apply FFT only on Images used in training and testing
		if row["image_path"] in valid_paths:
			# Construct the full path to the image
			path = DATASET_PATH + row["image_path"]
			
			# Generate the Fourier transformed image
			fft_img = greyscale_FFT(path)
			
			# Adjust the filename based on the target value
			filename = row["filename"]
			if row["target"] == 0:
				adjusted_filename = filename.replace("img", "real")
			else:
				adjusted_filename = filename.replace("img", "fake")

			# Construct the path to save the Fourier transformed image
			fft_path = FFTS_PATH + adjusted_filename
			cv2.imwrite(fft_path, fft_img)
			
			# Get the relative path of the Fourier transformed image
			split_fft_path = fft_path.split(DATASET_PATH)[-1]

			# Add metadata to the DataFrame
			fourier_metadata_df.loc[len(fourier_metadata_df)] = [adjusted_filename, split_fft_path, row["target"], row["category"]]
			
			# Update the dictionary
			path_matching_dict[row["image_path"]] = split_fft_path
	
	# Save 'metadata.csv'
	df_to_csv(fourier_metadata_df, "metadata.csv", DIR_FOURIER_PATH + "metadata.csv")

	# Save 'path_matching_dict' for utility
	# Convert the dictionary to a DataFrame with two columns: 'RGB Path' and 'Fourier Path'
	path_matching_df = pd.DataFrame(list(path_matching_dict.items()), columns=['RGB Path', 'Fourier Path'])
	
	# Save the DataFrame to a CSV file
	df_to_csv(path_matching_df, 'path_mapping.csv', DATASET_PATH + 'path_mapping.csv')

	return path_matching_dict
	

In [None]:
path_matching_dict = FFT_application(ds_partition_df, DATASET_PATH + "RGB_trainingSet.csv", DATASET_PATH + "RGB_testSet.csv")

# UPDATE TRAINING SET & TEST SET PATHS

In [None]:
def replace_paths_in_training_set(csv_path, path_matching_dict):
	"""
	Replace image paths in the training set CSV with their corresponding frequency domain paths.

	Parameters:
	----------
	csv_path (str): Path to the training CSV file ('trainingSet.csv').
	path_matching_dict (dict): Dictionary mapping RGB image paths to frequency domain image paths.
	"""
	# Load the DataFrame
	df = pd.read_csv(csv_path)

	# Replace paths
	for col in ['anchor', 'positive', 'negative']:
		df[col] = df[col].map(path_matching_dict).fillna(df[col])

	# Store the Training set
	df_to_csv(df, "trainingSet.csv", DATASET_PATH + "trainingSet.csv")

def replace_paths_in_test_set(csv_path, path_matching_dict):
	"""
	Replace image paths in the test set CSV with their corresponding frequency domain paths.

	Parameters:
	----------
	csv_path (str): Path to the test CSV file ('testSet.csv').
	path_matching_dict (dict): Dictionary mapping RGB image paths to frequency domain image paths.
	"""
	# Load the DataFrame
	df = pd.read_csv(csv_path)

	# Replace paths
	for col in ['real', 'fake']:
		df[col] = df[col].map(path_matching_dict).fillna(df[col])

	# Store the Test set
	df_to_csv(df, "testSet.csv", csv_path)

In [None]:
replace_paths_in_training_set(DATASET_PATH + "RGB_trainingSet.csv", path_matching_dict)
replace_paths_in_test_set(DATASET_PATH + "testSet.csv", path_matching_dict)

### CHECK FOR SUCCESSFULL PATH UPDATE

In [None]:
train_df = pd.read_csv(DATASET_PATH + "trainingSet.csv")
train_df

In [None]:
test_df = pd.read_csv(DATASET_PATH + "testSet.csv")
test_df