# 1 - setup file locations for pre proccessed images

In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

In [None]:
pip install opencv-python scikit-image numpy


In [None]:
real  = "/content/drive/MyDrive/ml_data_/Rprocessed"
fake = "/content/drive/MyDrive/ml_data_/Fproccessed"

In [None]:
import os
print(os.path.exists(real))

In [None]:
print(real)

# manual feature extraction
### methodes
- Color Histograms
- Texture Descriptors
- Edge Detection

In [None]:
#1. Color Histograms (RGB or HSV Distributions)

import cv2
import numpy as np

def extract_color_histogram(image, bins=(8, 8, 8)):
    # Convert to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Compute the histogram and normalize it
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist


In [None]:
#2. Texture Descriptors (Gabor Filters)

from skimage.filters import gabor

def extract_gabor_features(image, frequency=0.6):
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply Gabor filter
    filtered, _ = gabor(gray_image, frequency=frequency)
    return filtered.flatten()


In [None]:
# 3. Edges (Sobel or Canny Edge Detection)
def extract_edges(image, method='canny'):
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    if method == 'canny':
        # Canny edge detection
        edges = cv2.Canny(gray_image, 100, 200)
    elif method == 'sobel':
        # Sobel edge detection
        sobel_x = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
        sobel_y = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
        edges = np.sqrt(sobel_x**2 + sobel_y**2)

    return edges.flatten()


In [None]:
def extract_combined_features(image):
    color_features = extract_color_histogram(image)
    texture_features = extract_gabor_features(image)
    edge_features = extract_edges(image)
    # Combine all features
    combined_features = np.hstack((color_features, texture_features, edge_features))
    return combined_features

# reading data set


In [None]:
def process_dataset(real_dir, fake_dir):
    X, y = [], []

    # Process 'real' images
    for img_name in os.listdir(real_dir):
        img_path = os.path.join(real_dir, img_name)
        image = cv2.imread(img_path)
        if image is None:  # Check if image is loaded correctly
            print(f"Error loading image {img_name}")
            continue
        image = cv2.resize(image, (128, 128))  # resizing if it was not resized 3a4an el sizing issues

        features = extract_combined_features(image) # the combined function that was made
        features = features.flatten()  # Flatten the feature vector to 1D

        X.append(features)
        y.append(0)  # Label '0' for real images

    # Process 'fake' images
    for img_name in os.listdir(fake_dir):
        img_path = os.path.join(fake_dir, img_name)
        image = cv2.imread(img_path)
        if image is None:  # Check if image is loaded correctly
            print(f"Error loading image {img_name}")
            continue
        image = cv2.resize(image,  (128, 128))  # Resize image

        features = extract_combined_features(image)
        features = features.flatten()  # Flatten the feature vector to 1D

        X.append(features)
        y.append(1)  # Label '1' for fake images

    return np.array(X), np.array(y)


# Test the function
X, y = process_dataset(real, fake)


# training basic **svm**

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# standardization

In [None]:
from sklearn.preprocessing import StandardScaler

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test_scaled = scaler.transform(X_test)       # Transform the test data (use the same scaler)

print("Standardization completed!")


In [None]:
# from sklearn.model_selection import RandomizedSearchCV
# from sklearn.svm import SVC

# # Define the model
# svm = SVC()

# # Define the hyperparameter grid
# param_dist = {
#     'C': [0.1, 1, 10, 100],
#     'gamma': [1, 0.1, 0.01, 0.001],
#     'kernel': ['rbf', 'linear']
# }

# # Perform Randomized Search
# random_search = RandomizedSearchCV(svm, param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', random_state=42)
# random_search.fit(X_train, y_train)

# # Print the best parameters and score
# print("Best Parameters:", random_search.best_params_)
# print("Best Score:", random_search.best_score_)


In [None]:
# from sklearn.svm import SVC

# # Define the SVM model with recommended hyperparameters
# svm = SVC(C=10, gamma=0.001, kernel='rbf')

# # Fit the model on training data
# svm.fit(X_train, y_train)

# # Evaluate the model
# accuracy = svm.score(X_test, y_test)
# print(f"SVM Accuarcy : {accuracy * 100:.2f}%")



In [None]:
# from sklearn.svm import SVC

# # Define the SVM model with recommended hyperparameters
# svm = SVC()

# # Fit the model on training data
# svm.fit(X_train, y_train)

# # Evaluate the model
# accuracy = svm.score(X_test, y_test)
# print(f"SVM Accuarcy : {accuracy * 100:.2f}%")

# Random forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=50, random_state=42)

# Fit the model on the training data
rf_model.fit(X_train, y_train)

# Predict on the test data
rf_predictions = rf_model.predict(X_test)

# Calculate the accuracy
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Accuracy: {rf_accuracy * 100:.2f}%")


In [None]:
# import matplotlib.pyplot as plt
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score

# # Example Data (replace X_train, X_test, y_train, y_test with your dataset)
# # X_train, X_test, y_train, y_test = ...

# # Test different values for n_estimators
# n_estimators_range = [10, 50, 100, 200, 300]
# training_accuracies = []
# validation_accuracies = []

# # Loop through each value of n_estimators
# for n in n_estimators_range:
#     # Train the Random Forest model
#     rf_model = RandomForestClassifier(n_estimators=n, random_state=42)
#     rf_model.fit(X_train, y_train)

#     # Calculate training accuracy
#     train_preds = rf_model.predict(X_train)
#     train_accuracy = accuracy_score(y_train, train_preds)
#     training_accuracies.append(train_accuracy)

#     # Calculate validation accuracy
#     val_preds = rf_model.predict(X_test)
#     val_accuracy = accuracy_score(y_test, val_preds)
#     validation_accuracies.append(val_accuracy)

# # Plot the accuracy graph
# plt.figure(figsize=[6, 4])
# plt.plot(n_estimators_range, training_accuracies, 'black', linewidth=2.0, label='Training Accuracy')
# plt.plot(n_estimators_range, validation_accuracies, 'blue', linewidth=2.0, label='Validation Accuracy')
# plt.legend(fontsize=12)
# plt.xlabel('Number of Estimators (n_estimators)', fontsize=10)
# plt.ylabel('Accuracy', fontsize=10)
# plt.title('Random Forest Accuracy Curves', fontsize=12)
# plt.grid(True)
# plt.show()


In [None]:
# from sklearn.naive_bayes import GaussianNB
# from sklearn.metrics import accuracy_score

# # Initialize the Naive Bayes model
# nb_model = GaussianNB()

# # Train the Naive Bayes model on the standardized training data
# nb_model.fit(X_train_scaled, y_train)

# # Make predictions on the test set
# y_pred = nb_model.predict(X_test_scaled)

# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Naive Bayes Model Accuracy: {accuracy * 100:.2f}%")


In [None]:
# from sklearn.svm import SVC
# from sklearn.ensemble import RandomForestClassifier, VotingClassifier
# from sklearn.naive_bayes import GaussianNB
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler

# # Split the data (assuming X and y are your features and labels)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Standardize the features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)  # Fit and transform training data
# X_test_scaled = scaler.transform(X_test)  # Transform test data

# # Initialize the base models
# svm_model = SVC(probability=True, random_state=42)  # 'probability=True' for soft voting
# rf_model = RandomForestClassifier(random_state=42)
# nb_model = GaussianNB()

# # Create a Voting Classifier
# voting_classifier = VotingClassifier(
#     estimators=[('svm', svm_model), ('rf', rf_model), ('nb', nb_model)],
#     voting='soft'  # 'soft' for weighted probability voting, 'hard' for majority voting
# )

# # Train the ensemble model
# voting_classifier.fit(X_train_scaled, y_train)

# # Make predictions
# y_pred = voting_classifier.predict(X_test_scaled)

# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Ensemble Model Accuracy: {accuracy * 100:.2f}%")
