# LSB statistical analysis #

In [2]:
import os
import numpy as np
import cv2
import csv

def extract_lsb(image):
    """
    استخراج بیت کم ارزش (LSB) از تصویر.
    """
    return image & 1

def calculate_statistical_features(lsb):
    """
    محاسبه ویژگی‌های آماری از LSB.
    """
    mean = np.mean(lsb)  # میانگین بیت‌های LSB
    std_dev = np.std(lsb)  # انحراف معیار بیت‌های LSB
    unique, counts = np.unique(lsb, return_counts=True)
    bit_balance = counts[0] / (counts[1] + 1e-9)  # نسبت بیت‌های 0 به 1
    return mean, std_dev, bit_balance

def detect_steganography(image_path):
    """
    بررسی اینکه آیا تصویر آلوده است یا پاک.
    """
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to read image: {image_path}")
    
    lsb = extract_lsb(image)
    mean, std_dev, bit_balance = calculate_statistical_features(lsb)
    
    # معیارهای تشخیص
    threshold_mean = 0.02
    threshold_std = 0.05
    threshold_balance = 1.1
    
    # بررسی ویژگی‌ها برای تشخیص
    if abs(mean - 0.5) > threshold_mean or std_dev < threshold_std or bit_balance > threshold_balance:
        return "Steganography Detected"
    else:
        return "Clean"

def process_folder(input_folder, output_csv):
    """
    پردازش تمام تصاویر در یک پوشه و ذخیره نتایج در فایل CSV.
    """
    results = []
    clean_count = 0
    stego_count = 0

    for filename in os.listdir(input_folder):
        image_path = os.path.join(input_folder, filename)
        if not os.path.isfile(image_path):
            continue
        
        try:
            status = detect_steganography(image_path)
            results.append((filename, status))
            if status == "Clean":
                clean_count += 1
            else:
                stego_count += 1
        except Exception as e:
            print(f"Error processing {filename}: {e}")
    
    # ذخیره نتایج در فایل CSV
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Image", "Status"])
        writer.writerows(results)
    print(f"Results saved to {output_csv}")

    # چاپ آماری
    total_images = clean_count + stego_count
    clean_percentage = (clean_count / total_images) * 100 if total_images > 0 else 0
    stego_percentage = (stego_count / total_images) * 100 if total_images > 0 else 0

    print(f"\nSummary:")
    print(f"Total Images: {total_images}")
    print(f"Clean Images: {clean_count} ({clean_percentage:.2f}%)")
    print(f"Steganography Detected Images: {stego_count} ({stego_percentage:.2f}%)")

# اجرای کد برای پوشه نمونه
input_folder = "./10000_random_files/"  # مسیر پوشه ورودی
output_csv = "results.csv"  # فایل خروجی
process_folder(input_folder, output_csv)


Error processing Temples_000044.png: index 1 is out of bounds for axis 0 with size 1
Results saved to results.csv

Summary:
Total Images: 861
Clean Images: 570 (66.20%)
Steganography Detected Images: 291 (33.80%)


# Machine Learning #

In [3]:
import cv2
import numpy as np
import os

def embed_message(image, message):
    """
    جاسازی پیام در تصویر با استفاده از LSB.
    """
    binary_message = ''.join(format(ord(char), '08b') for char in message)
    flat_image = image.flatten()
    for i in range(len(binary_message)):
        flat_image[i] = (flat_image[i] & ~1) | int(binary_message[i])
    return flat_image.reshape(image.shape)

def generate_dataset(clean_folder, output_folder, message="Hidden Message"):
    """
    ایجاد داده‌های برچسب‌گذاری‌شده با جاسازی پیام‌ها در تصاویر.
    """
    os.makedirs(output_folder, exist_ok=True)
    for i, filename in enumerate(os.listdir(clean_folder)):
        image_path = os.path.join(clean_folder, filename)
        if filename.endswith(".png") or filename.endswith(".jpg"):
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            # ذخیره تصویر پاک
            cv2.imwrite(os.path.join(output_folder, f"clean_{i}.png"), image)
            # ایجاد و ذخیره تصویر آلوده
            stego_image = embed_message(image, message)
            cv2.imwrite(os.path.join(output_folder, f"stego_{i}.png"), stego_image)



In [4]:
def extract_features(image):
    """
    استخراج ویژگی‌ها از تصویر.
    """
    lsb = image & 1
    mean_lsb = np.mean(lsb)
    std_lsb = np.std(lsb)
    zeros, ones = np.unique(lsb, return_counts=True)[1]
    balance_ratio = zeros / (zeros + ones)
    return [mean_lsb, std_lsb, balance_ratio]


In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd

def train_ml_model(feature_csv):
    """
    آموزش مدل ماشین لرنینگ.
    """
    data = pd.read_csv(feature_csv)
    X = data[["mean_lsb", "std_lsb", "balance_ratio"]].values
    y = data["label"].values  # 0 برای پاک، 1 برای آلوده

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    clf = RandomForestClassifier()
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))

    return clf


# Frequency Analysis #

In [8]:
def analyze_lsb_distribution(image):
    """
    تحلیل فراوانی بیت‌ها در تصویر.
    """
    lsb = image & 1
    unique, counts = np.unique(lsb, return_counts=True)
    zeros = counts[0] if 0 in unique else 0
    ones = counts[1] if 1 in unique else 0
    balance_ratio = zeros / (zeros + ones)
    return balance_ratio, zeros, ones

def detect_with_distribution(image):
    """
    تشخیص پاک یا آلوده بودن با تحلیل فراوانی بیت‌ها.
    """
    balance_ratio, zeros, ones = analyze_lsb_distribution(image)
    if abs(balance_ratio - 0.5) > 0.1:
        return "Steganography Detected"
    else:
        return "Clean"


# Result #

In [9]:
def combine_results(image, clf):
    """
    ترکیب نتایج سه روش.
    """
    # روش اول: آماری
    lsb = image & 1
    mean_lsb = np.mean(lsb)
    std_lsb = np.std(lsb)

    # روش سوم: تحلیل فراوانی
    balance_ratio, _, _ = analyze_lsb_distribution(image)

    # روش دوم: ماشین لرنینگ
    features = np.array([[mean_lsb, std_lsb, balance_ratio]])
    ml_result = clf.predict(features)[0]

    # ترکیب نتایج
    results = [
        "Steganography Detected" if abs(mean_lsb - 0.5) > 0.05 else "Clean",
        "Steganography Detected" if abs(balance_ratio - 0.5) > 0.1 else "Clean",
        "Steganography Detected" if ml_result == 1 else "Clean",
    ]

    final_result = (
        "Steganography Detected"
        if results.count("Steganography Detected") > 1
        else "Clean"
    )
    return final_result

In [25]:
import os
import numpy as np
import cv2
import csv
from sklearn.ensemble import RandomForestClassifier

# روش اول: محاسبه ویژگی‌های آماری
def extract_lsb(image):
    return image & 1

def calculate_statistical_features(lsb):
    mean = np.mean(lsb)
    std_dev = np.std(lsb)
    unique, counts = np.unique(lsb, return_counts=True)
    bit_balance = counts[0] / (counts[1] + 1e-9) if len(counts) > 1 else 0
    return mean, std_dev, bit_balance

def detect_statistical(image):
    lsb = extract_lsb(image)
    mean, std_dev, bit_balance = calculate_statistical_features(lsb)
    threshold_mean = 0.02
    threshold_std = 0.05
    threshold_balance = 1.1
    if abs(mean - 0.5) > threshold_mean or std_dev < threshold_std or bit_balance > threshold_balance:
        return "Steganography Detected"
    return "Clean"

# روش دوم: ماشین لرنینگ
def train_dummy_ml_model():
    """
    آموزش یک مدل ساده با داده‌های مصنوعی.
    """
    # داده‌های ساختگی برای آموزش
    np.random.seed(42)
    X = np.random.rand(1000, 3)
    y = (X[:, 0] > 0.5).astype(int)  # مقدار فرضی
    clf = RandomForestClassifier()
    clf.fit(X, y)
    return clf

def detect_with_ml(image, clf):
    lsb = extract_lsb(image)
    mean, std_dev, bit_balance = calculate_statistical_features(lsb)
    features = np.array([[mean, std_dev, bit_balance]])
    prediction = clf.predict(features)[0]
    return "Steganography Detected" if prediction == 1 else "Clean"

# روش سوم: تحلیل توزیع بیت‌ها
def analyze_lsb_distribution(image):
    lsb = image & 1
    unique, counts = np.unique(lsb, return_counts=True)
    zeros = counts[0] if 0 in unique else 0
    ones = counts[1] if 1 in unique else 0
    balance_ratio = zeros / (zeros + ones + 1e-9)
    return balance_ratio, zeros, ones

def detect_with_distribution(image):
    balance_ratio, _, _ = analyze_lsb_distribution(image)
    if abs(balance_ratio - 0.5) > 0.1:
        return "Steganography Detected"
    return "Clean"

# ترکیب نتایج سه روش
def combine_results(stat_result, ml_result, dist_result):
    results = [stat_result, ml_result, dist_result]
    return "Steganography Detected" if results.count("Steganography Detected") > 1 else "Clean"

# پردازش پوشه تصاویر
def process_folder(input_folder, output_csv):
    results = []
    clean_count = 0
    stego_count = 0
    clf = train_dummy_ml_model()  # آموزش مدل

    for filename in os.listdir(input_folder):
        image_path = os.path.join(input_folder, filename)
        if not os.path.isfile(image_path):
            continue

        try:
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                raise ValueError(f"Unable to read image: {filename}")

            # اجرای سه روش
            stat_result = detect_statistical(image)
            ml_result = detect_with_ml(image, clf)
            dist_result = detect_with_distribution(image)

            # ترکیب نتایج
            final_result = combine_results(stat_result, ml_result, dist_result)

            results.append([filename, stat_result, ml_result, dist_result, final_result])
            if final_result == "Clean":
                clean_count += 1
            else:
                stego_count += 1

        except Exception as e:
            print(f"Error processing {filename}: {e}")

    # ذخیره نتایج در CSV
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Image", "Statistical Result", "ML Result", "Distribution Result", "Final Result"])
        writer.writerows(results)

    # چاپ آماری
    total_images = clean_count + stego_count
    clean_percentage = (clean_count / total_images) * 100 if total_images > 0 else 0
    stego_percentage = (stego_count / total_images) * 100 if total_images > 0 else 0

    print(f"\nSummary:")
    print(f"Total Images: {total_images}")
    print(f"Clean Images: {clean_count} ({clean_percentage:.2f}%)")
    print(f"Steganography Detected Images: {stego_count} ({stego_percentage:.2f}%)")

# اجرای کد
input_folder = "./10000_random_files/"
output_csv = "results.csv"
process_folder(input_folder, output_csv)



Summary:
Total Images: 1029
Clean Images: 774 (75.22%)
Steganography Detected Images: 255 (24.78%)


# Stego Maker #

In [17]:
import os
import random
import string
import cv2
import numpy as np

def generate_random_text(length=100):
    """
    تولید یک متن تصادفی با استفاده از حروف الفبا و اعداد.
    """
    characters = string.ascii_letters + string.digits + string.punctuation + ' '  # مجموعه کاراکترها
    random_text = ''.join(random.choice(characters) for _ in range(length))  # تولید متن تصادفی
    return random_text

def text_to_binary(text):
    """
    تبدیل متن به رشته باینری.
    """
    return ''.join(format(ord(c), '08b') for c in text)  # تبدیل هر کاراکتر به کد ASCII و سپس باینری

def embed_message(image, message):
    """
    نهان‌نگاری پیام در تصویر با استفاده از LSB.
    """
    # تبدیل تصویر به آرایه یک‌بعدی
    flattened_image = image.flatten()
    
    # بررسی ظرفیت
    if len(message) > len(flattened_image):
        raise ValueError("Message is too long for the image capacity.")
    
    # جاسازی بیت‌ها در LSB
    for i in range(len(message)):
        flattened_image[i] = (flattened_image[i] & ~1) | int(message[i])  # جایگذاری LSB
    
    # بازگرداندن آرایه به شکل تصویر اصلی
    embedded_image = flattened_image.reshape(image.shape)
    return embedded_image

def process_images(input_folder, clean_output_folder, stego_output_folder, num_clean, num_stego, usage_percent=0.8):
    """
    پردازش تصاویر:
    - تعدادی تصویر را بدون تغییر در پوشه Clean ذخیره می‌کند.
    - تعدادی تصویر را آلوده کرده و در پوشه Stego ذخیره می‌کند.
    """
    # دریافت لیست تصاویر
    images = [f for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
    if len(images) < num_clean + num_stego:
        raise ValueError("Not enough images in the input folder to satisfy the request.")

    # انتخاب تصاویر به صورت تصادفی
    selected_images = random.sample(images, num_clean + num_stego)
    clean_images = selected_images[:num_clean]
    stego_images = selected_images[num_clean:]

    # ایجاد پوشه‌ها در صورت نیاز
    os.makedirs(clean_output_folder, exist_ok=True)
    os.makedirs(stego_output_folder, exist_ok=True)

    # ذخیره تصاویر پاک
    for filename in clean_images:
        image_path = os.path.join(input_folder, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            print(f"Skipping invalid image: {filename}")
            continue
        output_path = os.path.join(clean_output_folder, filename)
        cv2.imwrite(output_path, image)
        print(f"Saved clean image: {output_path}")

    # ذخیره تصاویر آلوده
    for filename in stego_images:
        image_path = os.path.join(input_folder, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            print(f"Skipping invalid image: {filename}")
            continue
        
        # ظرفیت تصویر
        capacity = image.shape[0] * image.shape[1]
        
        # تولید متن تصادفی
        random_text = generate_random_text(length=100)  # طول متن تصادفی
        message = text_to_binary(random_text)  # تبدیل متن به باینری
        
        # نهان‌نگاری پیام
        stego_image = embed_message(image, message)
        output_path = os.path.join(stego_output_folder, filename)
        cv2.imwrite(output_path, stego_image)
        print(f"Saved stego image: {output_path}")

    print("\nProcessing complete.")
    print(f"Clean images saved: {num_clean}")
    print(f"Stego images saved: {num_stego}")

# مسیرها و تعداد تصاویر
input_folder = "./10000_random_files/"  # مسیر فولدر تصاویر پاک
clean_output_folder = "./output_clean/"  # مسیر ذخیره تصاویر پاک
stego_output_folder = "./output_stego/"  # مسیر ذخیره تصاویر آلوده
num_clean = 50  # تعداد تصاویر پاک
num_stego = 50  # تعداد تصاویر آلوده

# اجرای کد
process_images(input_folder, clean_output_folder, stego_output_folder, num_clean, num_stego)


Saved clean image: ./output_clean/Antique shops_000008.jpg
Saved clean image: ./output_clean/Birds_000012.jpg
Saved clean image: ./output_clean/Beauty_000026.jpg
Saved clean image: ./output_clean/Barbecue_000014.jpg
Saved clean image: ./output_clean/Architecture photography_000027.jpg
Saved clean image: ./output_clean/Breakfast_000006.jpg
Saved clean image: ./output_clean/Art exhibitions_000008.jpg
Saved clean image: ./output_clean/Baseball_000006.jpg
Saved clean image: ./output_clean/Balcony gardens_000017.jpg
Saved clean image: ./output_clean/Black and white photography_000026.jpg
Saved clean image: ./output_clean/Beer_000039.jpg
Saved clean image: ./output_clean/Animal rights_000029.jpg
Saved clean image: ./output_clean/Art exhibitions_000011.jpg
Saved clean image: ./output_clean/Accessories_000025.jpg
Saved clean image: ./output_clean/Beaches_000028.jpg
Saved clean image: ./output_clean/Abstract art_000038.jpg
Saved clean image: ./output_clean/Bookstores_000003.jpg
Saved clean imag

# روش آماری #

In [18]:
import os
import cv2
import numpy as np
import csv

def extract_lsb(image):
    """
    استخراج بیت‌های کم ارزش (LSB) از تصویر.
    """
    return image & 1

def calculate_statistical_features(lsb):
    """
    محاسبه ویژگی‌های آماری از LSB.
    """
    mean = np.mean(lsb)  # میانگین بیت‌های LSB
    std_dev = np.std(lsb)  # انحراف معیار بیت‌های LSB
    unique, counts = np.unique(lsb, return_counts=True)
    bit_balance = counts[0] / (counts[1] + 1e-9)  # نسبت بیت‌های 0 به 1
    return mean, std_dev, bit_balance

def detect_steganography(image_path):
    """
    تشخیص اینکه آیا تصویر آلوده است یا پاک.
    """
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to read image: {image_path}")
    
    lsb = extract_lsb(image)
    mean, std_dev, bit_balance = calculate_statistical_features(lsb)
    
    # معیارهای تشخیص
    threshold_mean = 0.02
    threshold_std = 0.05
    threshold_balance = 1.1
    
    # بررسی ویژگی‌ها برای تشخیص
    if abs(mean - 0.5) > threshold_mean or std_dev < threshold_std or bit_balance > threshold_balance:
        return "Stego"
    else:
        return "Clean"

def process_images_and_save_results(input_folder, output_csv):
    """
    پردازش تصاویر برای تشخیص پاک یا آلوده بودن و ذخیره نتایج در فایل CSV.
    """
    results = []
    clean_count = 0
    stego_count = 0

    for filename in os.listdir(input_folder):
        image_path = os.path.join(input_folder, filename)
        if not os.path.isfile(image_path):
            continue
        
        try:
            status = detect_steganography(image_path)
            results.append((filename, status))
            if status == "Clean":
                clean_count += 1
            else:
                stego_count += 1
        except Exception as e:
            print(f"Error processing {filename}: {e}")
    
    # ذخیره نتایج در فایل CSV
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Image", "Status"])
        writer.writerows(results)
    print(f"Results saved to {output_csv}")

    # چاپ آماری
    total_images = clean_count + stego_count
    clean_percentage = (clean_count / total_images) * 100 if total_images > 0 else 0
    stego_percentage = (stego_count / total_images) * 100 if total_images > 0 else 0

    print("\nSummary:")
    print(f"Total Images: {total_images}")
    print(f"Clean Images: {clean_count} ({clean_percentage:.2f}%)")
    print(f"Stego Images: {stego_count} ({stego_percentage:.2f}%)")

# مسیر و فایل خروجی
input_folder = "./combined_images/"  # مسیر فولدر تصاویر
output_csv = "detection_results.csv"  # فایل خروجی CSV

# اجرای کد
process_images_and_save_results(input_folder, output_csv)


Results saved to detection_results.csv

Summary:
Total Images: 1000
Clean Images: 752 (75.20%)
Stego Images: 248 (24.80%)


# غیر هوش مصنوعی #

In [19]:
import os
import numpy as np
import cv2
import csv

# استخراج بیت‌های کم ارزش (LSB)
def extract_lsb(image):
    return image & 1

# محاسبه ویژگی‌های آماری
def calculate_statistical_features(lsb):
    mean = np.mean(lsb)  # میانگین بیت‌های LSB
    std_dev = np.std(lsb)  # انحراف معیار بیت‌های LSB
    unique, counts = np.unique(lsb, return_counts=True)
    bit_balance = counts[0] / (counts[1] + 1e-9)  # نسبت بیت‌های 0 به 1
    return mean, std_dev, bit_balance

# محاسبه نسبت پیکسل‌های یکنواخت
def uniform_pixel_ratio(lsb):
    unique, counts = np.unique(lsb, return_counts=True)
    return max(counts) / lsb.size  # نسبت پیکسل یکنواخت

# تشخیص نهان‌نگاری بر اساس ویژگی‌ها
def detect_steganography(features):
    mean, std_dev, bit_balance, uniform_ratio = features
    if abs(mean - 0.5) > 0.02 or std_dev < 0.05 or bit_balance > 1.1 or uniform_ratio > 0.55:
        return "Steganography Detected"
    else:
        return "Clean"

# مقایسه پیش‌بینی‌ها با برچسب واقعی (بر اساس مکان تصویر)
def get_true_label(filename, clean_folder, stego_folder):
    if filename in os.listdir(clean_folder):
        return "Clean"
    elif filename in os.listdir(stego_folder):
        return "Steganography Detected"
    else:
        return None

# پردازش تصاویر در یک پوشه
def process_folder(input_folder, clean_folder, stego_folder, output_csv):
    results = []
    clean_count = 0
    stego_count = 0
    correct_count = 0
    total_images = 0
    
    for filename in os.listdir(input_folder):
        image_path = os.path.join(input_folder, filename)
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            continue
        
        # استخراج ویژگی‌ها
        lsb = extract_lsb(image)
        mean, std_dev, bit_balance = calculate_statistical_features(lsb)
        uniform_ratio = uniform_pixel_ratio(lsb)
        
        # تشخیص آلودگی
        features = [mean, std_dev, bit_balance, uniform_ratio]
        predicted_status = detect_steganography(features)
        
        # گرفتن برچسب واقعی
        true_label = get_true_label(filename, clean_folder, stego_folder)
        
        if true_label:
            total_images += 1
            if predicted_status == true_label:
                correct_count += 1
        
        results.append((filename, predicted_status, true_label))
        
        # شمارش تعداد تصاویر
        if predicted_status == "Clean":
            clean_count += 1
        else:
            stego_count += 1
    
    # محاسبه دقت
    accuracy = (correct_count / total_images) * 100 if total_images > 0 else 0
    
    # ذخیره نتایج در فایل CSV
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Image", "Predicted Status", "True Label"])
        writer.writerows(results)

    # چاپ نتایج
    clean_percentage = (clean_count / total_images) * 100 if total_images > 0 else 0
    stego_percentage = (stego_count / total_images) * 100 if total_images > 0 else 0

    print(f"\nSummary:")
    print(f"Total Images: {total_images}")
    print(f"Clean Images: {clean_count} ({clean_percentage:.2f}%)")
    print(f"Steganography Detected Images: {stego_count} ({stego_percentage:.2f}%)")
    print(f"Correct Predictions: {correct_count} ({accuracy:.2f}%)")
    print(f"Results saved to {output_csv}")

# مثال استفاده از کد
input_folder = "./combined_images"  # پوشه تصاویر
clean_folder = "./output_clean"  # پوشه تصاویر پاک
stego_folder = "./output_stego"  # پوشه تصاویر آلوده
output_csv = "final_results.csv"  # فایل خروجی
process_folder(input_folder, clean_folder, stego_folder, output_csv)



Summary:
Total Images: 98
Clean Images: 752 (767.35%)
Steganography Detected Images: 248 (253.06%)
Correct Predictions: 44 (44.90%)
Results saved to final_results.csv


# روش یادگیری ماشین #

In [11]:
import os
import numpy as np
import cv2
import csv
from scipy.stats import skew, kurtosis
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Function to extract LSB
def extract_lsb(image):
    return image & 1

# Function to calculate statistical features for the entire image
def calculate_statistical_features(lsb):
    mean = np.mean(lsb)
    std_dev = np.std(lsb)
    skewness = skew(lsb.flatten())
    kurt = kurtosis(lsb.flatten())
    unique, counts = np.unique(lsb, return_counts=True)
    bit_balance = counts[0] / (counts[1] + 1e-9) if len(counts) > 1 else 1.0
    return mean, std_dev, skewness, kurt, bit_balance

# Function to calculate block-wise features
def calculate_block_features(lsb, block_size=6):
    h, w = lsb.shape
    features = []
    for i in range(0, h, block_size):
        for j in range(0, w, block_size):
            block = lsb[i:i+block_size, j:j+block_size]
            if block.size == 0:
                continue
            features.extend(calculate_statistical_features(block))
    return features

# Function to process a single image
def process_image(image_path, block_size=6):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to read image: {image_path}")
    lsb = extract_lsb(image)
    global_features = calculate_statistical_features(lsb)
    block_features = calculate_block_features(lsb, block_size)
    return list(global_features) + block_features

# Function to prepare dataset
def prepare_dataset(folder, label, block_size=6):
    data = []
    labels = []
    for filename in os.listdir(folder):
        image_path = os.path.join(folder, filename)
        if not os.path.isfile(image_path):
            continue
        try:
            features = process_image(image_path, block_size)
            data.append(features)
            labels.append(label)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
    return data, labels

# Main function to train and evaluate
def train_and_evaluate(clean_folder, stego_folder, output_csv):
    print("Preparing dataset...")
    clean_data, clean_labels = prepare_dataset(clean_folder, 0)  # Label 0 for clean
    stego_data, stego_labels = prepare_dataset(stego_folder, 1)  # Label 1 for stego

    # Combine data and labels
    X = np.array(clean_data + stego_data)
    y = np.array(clean_labels + stego_labels)

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Train model
    print("Training Gradient Boosting Classifier...")
    model = GradientBoostingClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print("\nModel Evaluation:")
    print(classification_report(y_test, y_pred))

    # Save results to CSV
    results = zip(y_test, y_pred)
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["True Label", "Predicted Label"])
        writer.writerows(results)

    print(f"Results saved to {output_csv}")

# Folders for clean and stego images
clean_folder = "./output_clean"
stego_folder = "./output_stego"
output_csv = "final_results.csv"

train_and_evaluate(clean_folder, stego_folder, output_csv)


Preparing dataset...


  skewness = skew(lsb.flatten())
  kurt = kurtosis(lsb.flatten())


KeyboardInterrupt: 