In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import wandb
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [4]:
# Initialize W&B
wandb.init(project="ai-vs-human-generated-images", name="frequency_features_rf")

In [5]:
# Load dataset
data_dir = "/kaggle/input/ai-vs-human-generated-dataset"
df = (pd.read_csv('/kaggle/input/ai-vs-human-generated-dataset/train.csv')).sample(n=5000,random_state=42)

In [6]:
def extract_frequency_features(img_path):
    """ Extracts frequency-based statistical features from an image """
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
    f = np.fft.fft2(img)  # Apply 2D Fourier Transform
    fshift = np.fft.fftshift(f)  # Shift zero frequency to center
    magnitude_spectrum = np.abs(fshift)  # Get magnitude spectrum

    # Compute frequency domain features
    mean_freq = np.mean(magnitude_spectrum)
    var_freq = np.var(magnitude_spectrum)
    high_freq_ratio = np.sum(magnitude_spectrum[64:, 64:]) / np.sum(magnitude_spectrum)  # Ratio of high-freq energy

    return [mean_freq, var_freq, high_freq_ratio]

In [7]:
# Extract features for all images
features = []
labels = []

for index, row in df.iterrows():
    img_path = os.path.join(data_dir, row['file_name'])
    feature_vector = extract_frequency_features(img_path)
    features.append(feature_vector)
    labels.append(row['label'])  # 0 = Human, 1 = AI

# Convert to DataFrame
features_df = pd.DataFrame(features, columns=["Mean_Freq", "Var_Freq", "High_Freq_Ratio"])
features_df["Label"] = labels

In [8]:
# Split dataset
X = features_df.drop("Label", axis=1)
y = features_df["Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameters and log them to W&B
hyperparams = {"n_estimators": 100, "random_state": 42}
wandb.config.update(hyperparams)

# Train a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=hyperparams["n_estimators"], random_state=hyperparams["random_state"])
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [9]:
# Log metrics to W&B
wandb.log({"accuracy": accuracy})

# Print results
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

# Save model to W&B
wandb.save("random_forest_model.pkl")

Accuracy: 0.773
              precision    recall  f1-score   support

           0       0.79      0.79      0.79       538
           1       0.75      0.76      0.75       462

    accuracy                           0.77      1000
   macro avg       0.77      0.77      0.77      1000
weighted avg       0.77      0.77      0.77      1000



[]

In [10]:
wandb.finish()

0,1
accuracy,▁

0,1
accuracy,0.773
