APPLYING PCA FOR INDIVIDUAL MODULES

In [2]:
import os
import numpy as np
from sklearn.decomposition import PCA

# Input and output folder paths
input_features_folder = "D:/rp/dataset/ATVFS/feature_folder/u03/original_feature_efficientnetb0"
output_pca_folder = "D:/rp/dataset/ATVFS/feature_folder/u03/1_pca"

# Create the output folder if it doesn't exist
os.makedirs(output_pca_folder, exist_ok=True)

# Iterate through the extracted features folder
for feature_filename in os.listdir(input_features_folder):
    if feature_filename.endswith(".npy"):
        feature_filepath = os.path.join(input_features_folder, feature_filename)

        # Read feature information from the NumPy file
        feature_data = np.load(feature_filepath, allow_pickle=True)

        # Extract relevant information for creating KML (adjust this part based on your data)
        features = feature_data[0][0]  # Assuming features are at index 0

        # Apply PCA to reduce dimensionality
        pca = PCA(n_components=5)  # Set the desired number of components
        reduced_features = pca.fit_transform(features)

        # Save the reduced features to a new file in the output PCA folder
        output_pca_filepath = os.path.join(output_pca_folder, feature_filename.replace(".npy", "_pca.npy"))
        np.save(output_pca_filepath, reduced_features)

print(f"PCA applied and results saved to: {output_pca_folder}")


PCA applied and results saved to: D:/rp/dataset/ATVFS/feature_folder/u03/1_pca


CONCATENATING FEATURE FODLERS AFTER APPLYING PCA

In [3]:
import os
import numpy as np

# Base folder path
base_folder = "D:/rp/dataset/ATVFS/feature_folder"

# List of user folders (u01, u02, u03, u04)
user_folders = ["u01", "u02", "u03", "u04"]

# List of PCA folders (1_pca, 2_pca, 3_pca, 4_pca)
pca_folders = ["1_pca", "2_pca", "3_pca", "4_pca"]

# Output folder for concatenated PCA features
output_folder_fake = "./concatenated_pca_features/fake"
output_folder_original = "./concatenated_pca_features/original"

# Create the output folders if they don't exist
os.makedirs(output_folder_fake, exist_ok=True)
os.makedirs(output_folder_original, exist_ok=True)

# Iterate through user folders
for user_folder in user_folders:
    concatenated_fake_features = None
    concatenated_original_features = None

    # Iterate through fake and original subfolders
    for data_type in ["fake", "original"]:
        # Iterate through PCA folders
        for pca_folder in pca_folders:
            # Construct the path to the PCA folder
            pca_folder_path = os.path.join(base_folder, user_folder, data_type, pca_folder)

            # Collect all .npy files in the PCA folder
            pca_files = [file for file in os.listdir(pca_folder_path) if file.endswith(".npy")]

            # Concatenate the features from all .npy files
            for pca_file in pca_files:
                pca_filepath = os.path.join(pca_folder_path, pca_file)
                pca_features = np.load(pca_filepath, allow_pickle=True)

                # Concatenate the features based on data type
                if data_type == "fake":
                    if concatenated_fake_features is None:
                        concatenated_fake_features = pca_features
                    else:
                        concatenated_fake_features = np.concatenate((concatenated_fake_features, pca_features), axis=1)
                elif data_type == "original":
                    if concatenated_original_features is None:
                        concatenated_original_features = pca_features
                    else:
                        concatenated_original_features = np.concatenate((concatenated_original_features, pca_features), axis=1)

    # Save the concatenated features to new files in the output folders
    output_fake_filepath = os.path.join(output_folder_fake, f"{user_folder}_concatenated_fake_pca.npy")
    output_original_filepath = os.path.join(output_folder_original, f"{user_folder}_concatenated_original_pca.npy")

    np.save(output_fake_filepath, concatenated_fake_features)
    np.save(output_original_filepath, concatenated_original_features)

print(f"Concatenated PCA features saved to: {output_folder_fake} and {output_folder_original}")


Concatenated PCA features saved to: ./concatenated_pca_features/fake and ./concatenated_pca_features/original


CNN CLASSIFIER

In [4]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.utils import to_categorical
def load_data(folder_path, label):
    features_list = []
    labels_list = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".npy"):
            filepath = os.path.join(folder_path, filename)
            feature = np.load(filepath, allow_pickle=True)
            if feature.shape == ():
                continue  
            feature = np.ravel(feature)
            
            features_list.append(feature)
            labels_list.append(label)
    
    return features_list, labels_list

fake_folder_path = "./concatenated_pca_features/fake/"
original_folder_path = "./concatenated_pca_features/original/"

fake_features, fake_labels = load_data(fake_folder_path, label=0)
original_features, original_labels = load_data(original_folder_path, label=1)

max_length = max(len(feature) for feature in fake_features + original_features)
fake_features = [np.pad(feature, (0, max_length - len(feature)), 'constant') for feature in fake_features]
original_features = [np.pad(feature, (0, max_length - len(feature)), 'constant') for feature in original_features]

all_features = np.concatenate((fake_features, original_features), axis=0)
all_labels = np.concatenate((fake_labels, original_labels), axis=0)

label_encoder = LabelEncoder()
all_labels_encoded = label_encoder.fit_transform(all_labels)
all_labels_categorical = to_categorical(all_labels_encoded)

X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels_categorical, test_size=0.2, random_state=42)

X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

model = Sequential()
model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=(max_length, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

_, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 50.00%


SVM CLASSIFIER

In [5]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Function to load features and labels
def load_data(folder_path, label):
    features_list = []
    labels_list = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".npy"):
            filepath = os.path.join(folder_path, filename)
            feature = np.load(filepath, allow_pickle=True)
            
            # Ensure the feature has consistent shape (adjust based on your data)
            if feature.shape == ():
                continue  # Skip empty features
            
            # Flatten the feature to a one-dimensional array
            feature = np.ravel(feature)
            
            features_list.append(feature)
            labels_list.append(label)
    
    return features_list, labels_list

# Folder paths for fake and original features
fake_folder_path = "./concatenated_pca_features/fake/"
original_folder_path = "./concatenated_pca_features/original/"

# Load fake and original features and labels
fake_features, fake_labels = load_data(fake_folder_path, label=0)
original_features, original_labels = load_data(original_folder_path, label=1)

# Ensure consistent feature dimensions
max_length = max(len(feature) for feature in fake_features + original_features)
fake_features = [np.pad(feature, (0, max_length - len(feature)), 'constant') for feature in fake_features]
original_features = [np.pad(feature, (0, max_length - len(feature)), 'constant') for feature in original_features]

# Combine features and labels
all_features = np.concatenate((fake_features, original_features), axis=0)
all_labels = np.concatenate((fake_labels, original_labels), axis=0)

# Encode labels
label_encoder = LabelEncoder()
all_labels_encoded = label_encoder.fit_transform(all_labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels_encoded, test_size=0.2, random_state=42)

# Reshape features for SVM
X_train_svm = np.vstack(X_train)
X_test_svm = np.vstack(X_test)

# Train the SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_svm, y_train)

# Make predictions on the test set
y_pred_svm = svm_classifier.predict(X_test_svm)

# Evaluate the SVM model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm * 100:.2f}%")


SVM Accuracy: 100.00%


LOGISTIC REGRESSION CLASSIFIER

In [6]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Function to load features and labels
def load_data(folder_path, label):
    features_list = []
    labels_list = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".npy"):
            filepath = os.path.join(folder_path, filename)
            feature = np.load(filepath, allow_pickle=True)
            
            # Ensure the feature has consistent shape (adjust based on your data)
            if feature.shape == ():
                continue  # Skip empty features
            
            # Flatten the feature to a one-dimensional array
            feature = np.ravel(feature)
            
            features_list.append(feature)
            labels_list.append(label)
    
    return features_list, labels_list

# Folder paths for fake and original features
fake_folder_path = "./concatenated_pca_features/fake/"
original_folder_path = "./concatenated_pca_features/original/"

# Load fake and original features and labels
fake_features, fake_labels = load_data(fake_folder_path, label=0)
original_features, original_labels = load_data(original_folder_path, label=1)

# Ensure consistent feature dimensions
max_length = max(len(feature) for feature in fake_features + original_features)
fake_features = [np.pad(feature, (0, max_length - len(feature)), 'constant') for feature in fake_features]
original_features = [np.pad(feature, (0, max_length - len(feature)), 'constant') for feature in original_features]

# Combine features and labels
all_features = np.concatenate((fake_features, original_features), axis=0)
all_labels = np.concatenate((fake_labels, original_labels), axis=0)

# Encode labels
label_encoder = LabelEncoder()
all_labels_encoded = label_encoder.fit_transform(all_labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels_encoded, test_size=0.2, random_state=42)

# Reshape features for logistic regression
X_train_lr = np.vstack(X_train)
X_test_lr = np.vstack(X_test)

# Train the logistic regression classifier
lr_classifier = LogisticRegression()
lr_classifier.fit(X_train_lr, y_train)

# Make predictions on the test set
y_pred_lr = lr_classifier.predict(X_test_lr)

# Evaluate the logistic regression model
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Accuracy: {accuracy_lr * 100:.2f}%")


Logistic Regression Accuracy: 100.00%
