# Feature Extraction

In this experment we have taken resnet50, efficientnet_b0, and vit_b_16 as feature extractor then we have train different classifier to identify the wellness of features by achieving better accuracy

In [1]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
from tqdm import tqdm

# Define dataset path
dataset_path = "dataset/"  # Update with your dataset path

# Define device (use CUDA 5 if available)
device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")

# Define models for feature extraction
models_dict = {
    "resnet50": models.resnet50(pretrained=True).to(device),
    "efficientnet_b0": models.efficientnet_b0(pretrained=True).to(device),
    "vit_b_16": models.vit_b_16(pretrained=True).to(device)
}

# Set models to evaluation mode
for model in models_dict.values():
    model.eval()

# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to extract features
def extract_features(model, img_path):
    image = Image.open(img_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        features = model(image)
    return features.cpu().numpy().flatten()

# Loop over datasets and extract features
feature_data = []
labels = []

for dataset in os.listdir(dataset_path):
    dataset_folder = os.path.join(dataset_path, dataset)
    if os.path.isdir(dataset_folder):
        for split in ["train", "test"]:  # Adjusted for dataset structure
            split_folder = os.path.join(dataset_folder, split)
            if os.path.isdir(split_folder):
                for label in ["real", "fake"]:
                    image_folder = os.path.join(split_folder, label)
                    if os.path.isdir(image_folder):
                        for img_name in tqdm(os.listdir(image_folder), desc=f"Processing {dataset}/{split}/{label}"):
                            img_path = os.path.join(image_folder, img_name)
                            
                            feature_vector = []
                            for model_name, model in models_dict.items():
                                features = extract_features(model, img_path)
                                feature_vector.extend(features)
                            
                            feature_data.append(feature_vector)
                            labels.append(0 if label == "real" else 1)

# Save features and labels
np.save("features.npy", np.array(feature_data))
np.save("labels.npy", np.array(labels))
print("Feature extraction completed!")


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /data/home/arunkumar12/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:03<00:00, 29.8MB/s]
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /data/home/arunkumar12/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:01<00:00, 17.5MB/s]
Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /data/home/arunkumar12/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:09<00:00, 36.7MB/s] 
Processing DataSet3/train/real: 100%|██████████| 20000/20000 [45:43<00:00,  7.29it/s]  
Processing DataSet3/train/fake: 100%|██████████| 20000/20000 [59:38<00:00,  5.59it/s]  
Processing DataSet3/test/real: 100%|██████████| 2603/2603 [05:13<00:00,  8.32it/s]
Processing DataSet3/test/fake: 100%|██████████| 2623/2623 [05:14<00:00,  8.33it/s]
Processing DataS

Feature extraction completed!


# MLP with 3 hidden layer, dropout and batchnormalization

In [5]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from torch import nn, optim

# Define device (use CUDA 5 if available)
device = torch.device("cuda:6" if torch.cuda.is_available() else "cpu")

# Load extracted features and labels
feature_data = np.load("features.npy")
labels = np.load("labels.npy")

# Normalize features
feature_data = (feature_data - np.mean(feature_data, axis=0)) / np.std(feature_data, axis=0)

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(feature_data, labels, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32).to(device), torch.tensor(X_test, dtype=torch.float32).to(device)
y_train, y_test = torch.tensor(y_train, dtype=torch.long).to(device), torch.tensor(y_test, dtype=torch.long).to(device)

# Define MLP classifier
class MLPClassifier(nn.Module):
    def __init__(self, input_size):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 2)  # Binary classification
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# Initialize model
input_size = X_train.shape[1]
model = MLPClassifier(input_size).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    predictions = torch.argmax(outputs, dim=1)
    accuracy = (predictions == y_test).float().mean()
    print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")

print("Training and evaluation completed!")

Epoch [1/20], Loss: 0.7054
Epoch [2/20], Loss: 0.7701
Epoch [3/20], Loss: 0.5784
Epoch [4/20], Loss: 0.6119
Epoch [5/20], Loss: 0.5625
Epoch [6/20], Loss: 0.5229
Epoch [7/20], Loss: 0.5221
Epoch [8/20], Loss: 0.5169
Epoch [9/20], Loss: 0.4979
Epoch [10/20], Loss: 0.4802
Epoch [11/20], Loss: 0.4737
Epoch [12/20], Loss: 0.4708
Epoch [13/20], Loss: 0.4618
Epoch [14/20], Loss: 0.4514
Epoch [15/20], Loss: 0.4437
Epoch [16/20], Loss: 0.4374
Epoch [17/20], Loss: 0.4336
Epoch [18/20], Loss: 0.4279
Epoch [19/20], Loss: 0.4186
Epoch [20/20], Loss: 0.4115
Test Accuracy: 78.78%
Training and evaluation completed!


## MLP classifier

In [8]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from torch import nn, optim

# Define device (use CUDA 5 if available)
device = torch.device("cuda:6" if torch.cuda.is_available() else "cpu")

# Load extracted features and labels
feature_data = np.load("features.npy")
labels = np.load("labels.npy")

# Normalize features
feature_data = (feature_data - np.mean(feature_data, axis=0)) / np.std(feature_data, axis=0)

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(feature_data, labels, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32).to(device), torch.tensor(X_test, dtype=torch.float32).to(device)
y_train, y_test = torch.tensor(y_train, dtype=torch.long).to(device), torch.tensor(y_test, dtype=torch.long).to(device)

# Define MLP classifier
class MLPClassifier(nn.Module):
    def __init__(self, input_size):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 2048)
        self.bn1 = nn.BatchNorm1d(2048)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(2048, 1024)
        self.bn2 = nn.BatchNorm1d(1024)
        self.fc3 = nn.Linear(1024, 512)
        self.bn3 = nn.BatchNorm1d(512)
        self.fc4 = nn.Linear(512, 2)  # Binary classification
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x

# Initialize model
input_size = X_train.shape[1]
model = MLPClassifier(input_size).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-4)

# Training loop
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
    # Calculate training accuracy
    predictions = torch.argmax(outputs, dim=1)
    train_accuracy = (predictions == y_train).float().mean().item()
    
    # Evaluate on test set at each epoch
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        test_predictions = torch.argmax(test_outputs, dim=1)
        test_accuracy = (test_predictions == y_test).float().mean().item()
    
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, Test Accuracy: {test_accuracy * 100:.2f}%")

# Final Evaluation
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    predictions = torch.argmax(outputs, dim=1)
    accuracy = (predictions == y_test).float().mean()
    print(f"Final Test Accuracy: {accuracy.item() * 100:.2f}%")

print("Training and evaluation completed!")


Epoch [1/50], Loss: 0.7186, Train Accuracy: 49.98%, Test Accuracy: 61.41%
Epoch [2/50], Loss: 0.7600, Train Accuracy: 66.11%, Test Accuracy: 55.15%
Epoch [3/50], Loss: 0.5789, Train Accuracy: 69.31%, Test Accuracy: 51.95%
Epoch [4/50], Loss: 0.6065, Train Accuracy: 66.41%, Test Accuracy: 61.16%
Epoch [5/50], Loss: 0.5461, Train Accuracy: 72.10%, Test Accuracy: 71.89%
Epoch [6/50], Loss: 0.5249, Train Accuracy: 73.46%, Test Accuracy: 73.41%
Epoch [7/50], Loss: 0.5225, Train Accuracy: 73.42%, Test Accuracy: 73.60%
Epoch [8/50], Loss: 0.5067, Train Accuracy: 74.71%, Test Accuracy: 73.52%
Epoch [9/50], Loss: 0.4853, Train Accuracy: 76.38%, Test Accuracy: 72.78%
Epoch [10/50], Loss: 0.4757, Train Accuracy: 77.02%, Test Accuracy: 72.95%
Epoch [11/50], Loss: 0.4707, Train Accuracy: 77.29%, Test Accuracy: 74.48%
Epoch [12/50], Loss: 0.4582, Train Accuracy: 78.22%, Test Accuracy: 76.54%
Epoch [13/50], Loss: 0.4433, Train Accuracy: 78.93%, Test Accuracy: 77.77%
Epoch [14/50], Loss: 0.4379, Train

# logistic regression 

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load extracted features and labels
feature_data = np.load("features.npy")
labels = np.load("labels.npy")

# Normalize features
scaler = StandardScaler()
feature_data = scaler.fit_transform(feature_data)

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(feature_data, labels, test_size=0.2, random_state=42)

# Define and train Logistic Regression classifier
logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train, y_train)

# Evaluate the model
y_pred = logistic_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Logistic Regression Accuracy: {accuracy:.2f}%")


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Accuracy: 84.57%


# Decision Tree

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load extracted features and labels
feature_data = np.load("features.npy")
labels = np.load("labels.npy")

# Normalize features
scaler = StandardScaler()
feature_data = scaler.fit_transform(feature_data)

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(feature_data, labels, test_size=0.2, random_state=42)

# Define and train Decision Tree classifier
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# Evaluate the model
y_pred = dt_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred) * 100
print(f"Decision Tree Accuracy: {accuracy:.2f}%")


Decision Tree Accuracy: 66.25%
