In [2]:
import os
import numpy as np

def load_data(root_folder, user_id, model_name, label):
    features_list = []
    labels_list = []

    folder_path = os.path.join(root_folder, f"u{user_id.zfill(2)}", model_name)

    for filename in os.listdir(folder_path):
        if filename.endswith(".npy"):
            filepath = os.path.join(folder_path, filename)
            feature = np.load(filepath, allow_pickle=True)

            # Ensure the feature has consistent shape (adjust based on your data)
            if feature.shape == ():
                continue  # Skip empty features

            # Flatten the feature to a one-dimensional array if necessary
            feature = np.ravel(feature)

            features_list.append(feature)
            labels_list.append(label)

    return features_list, labels_list

root_folder = "D:/rp/dataset/ATVFS/features_new"  

fake_models = ["fake_feature_densenet121", "fake_feature_efficientnetb0", "fake_feature_resnet50","fake_feature_alexnet",
                "fake_feature_inceptionv3", "fake_feature_vgg16"]
original_models = ["original_feature_densenet121", "original_feature_efficientnetb0",  "original_feature_resnet50",
                   "original_feature_alexnet", "original_feature_inceptionv3", "original_feature_vgg16"]

# Load features and labels for fake data
fake_features_list = []
fake_labels_list = []

for user_id in range(1, 5):  # Assuming user IDs u01 to u04
    for model_name in fake_models:
        features, labels = load_data(root_folder, str(user_id).zfill(2), model_name, label=0)
        fake_features_list.extend(features)
        fake_labels_list.extend(labels)

# Load features and labels for original data
original_features_list = []
original_labels_list = []

for user_id in range(1, 5):  # Assuming user IDs u01 to u04
    for model_name in original_models:
        features, labels = load_data(root_folder, str(user_id).zfill(2), model_name, label=1)
        original_features_list.extend(features)
        original_labels_list.extend(labels)

# Find the maximum dimension for both fake and original features
max_fake_dimension = max(feature.shape[0] for feature in fake_features_list)
max_original_dimension = max(feature.shape[0] for feature in original_features_list)

# Pad or reshape the features to have the same dimensions
fake_features_list = [np.pad(feature, (0, max_fake_dimension - feature.shape[0]), 'constant') if feature.shape[0] < max_fake_dimension else feature for feature in fake_features_list]
original_features_list = [np.pad(feature, (0, max_original_dimension - feature.shape[0]), 'constant') if feature.shape[0] < max_original_dimension else feature for feature in original_features_list]

# Stack features along a new axis for both fake and original data
all_fake_features = np.stack(fake_features_list, axis=1)  # Adjust axis if needed
all_fake_labels = np.array(fake_labels_list)

all_original_features = np.stack(original_features_list, axis=1)  # Adjust axis if needed
all_original_labels = np.array(original_labels_list)



In [3]:
from sklearn.metrics.pairwise import rbf_kernel

gamma = 1.0 / all_fake_features.shape[0]  
gamma = 1.0 / all_original_features.shape[0]

all_fake_features = rbf_kernel(all_fake_features.T, gamma=gamma)

all_original_features = rbf_kernel(all_original_features.T, gamma=gamma)


In [4]:
# Save fake features and labels
np.save("./all_fake_features_rbf.npy", all_fake_features)
np.save("./all_fake_labels.npy", all_fake_labels)

# Save original features and labels
np.save("./all_original_features_rbf.npy", all_original_features)
np.save("./all_original_labels.npy", all_original_labels)


In [5]:
print(all_fake_features)
print(all_fake_labels)
print(all_original_features)
print(all_original_labels)
print(len(all_fake_features))
print(len(all_fake_labels))
print(len(all_original_features))
print(len(all_original_labels))

[[1.0000000e+00 7.0014352e-01 7.0671576e-01 ... 4.6463036e-05
  4.1350449e-06 2.7574189e-03]
 [7.0014352e-01 1.0000000e+00 6.4843917e-01 ... 4.8052181e-05
  4.1259300e-06 2.8378163e-03]
 [7.0671576e-01 6.4843917e-01 1.0000000e+00 ... 4.7974750e-05
  4.2467759e-06 2.8834499e-03]
 ...
 [4.6463036e-05 4.8052181e-05 4.7974750e-05 ... 1.0000000e+00
  1.3993655e-05 1.0063327e-05]
 [4.1350449e-06 4.1259300e-06 4.2467759e-06 ... 1.3993655e-05
  1.0000000e+00 9.0996423e-07]
 [2.7574189e-03 2.8378163e-03 2.8834499e-03 ... 1.0063327e-05
  9.0996423e-07 1.0000000e+00]]
[0 0 0 ... 0 0 0]
[[1.0000000e+00 7.8205645e-01 7.8597564e-01 ... 4.7976174e-04
  1.2182771e-03 2.2377873e-04]
 [7.8205645e-01 1.0000000e+00 8.0275476e-01 ... 4.4002640e-04
  1.1147797e-03 2.0641035e-04]
 [7.8597564e-01 8.0275476e-01 1.0000000e+00 ... 4.2376795e-04
  1.0552508e-03 1.9668184e-04]
 ...
 [4.7976174e-04 4.4002640e-04 4.2376795e-04 ... 1.0000000e+00
  1.4918220e-03 2.8085927e-02]
 [1.2182771e-03 1.1147797e-03 1.0552508e-

In [6]:
import os
import numpy as np
from sklearn.decomposition import PCA

pca_fake = PCA()
pca_fake.fit(all_fake_features)

pca_original = PCA()
pca_original.fit(all_original_features)

cumulative_explained_variance = np.cumsum(pca_fake.explained_variance_ratio_)

desired_variance_ratio = 0.95
desired_components = np.argmax(cumulative_explained_variance >= desired_variance_ratio) + 1

print(f"Desired number of components to retain {desired_variance_ratio * 100}% variance: {desired_components}")

pca_fake = PCA(n_components=desired_components)
pca_fake_features = pca_fake.fit_transform(all_fake_features)

pca_original = PCA(n_components=desired_components)
pca_original_features = pca_fake.fit_transform(all_original_features)



Desired number of components to retain 95.0% variance: 2


In [7]:
print(all_fake_features)
print(all_fake_labels)
print(all_original_features)
print(all_original_labels)
print(len(all_fake_features))
print(len(all_fake_labels))
print(len(all_original_features))
print(len(all_original_labels))

[[1.0000000e+00 7.0014352e-01 7.0671576e-01 ... 4.6463036e-05
  4.1350449e-06 2.7574189e-03]
 [7.0014352e-01 1.0000000e+00 6.4843917e-01 ... 4.8052181e-05
  4.1259300e-06 2.8378163e-03]
 [7.0671576e-01 6.4843917e-01 1.0000000e+00 ... 4.7974750e-05
  4.2467759e-06 2.8834499e-03]
 ...
 [4.6463036e-05 4.8052181e-05 4.7974750e-05 ... 1.0000000e+00
  1.3993655e-05 1.0063327e-05]
 [4.1350449e-06 4.1259300e-06 4.2467759e-06 ... 1.3993655e-05
  1.0000000e+00 9.0996423e-07]
 [2.7574189e-03 2.8378163e-03 2.8834499e-03 ... 1.0063327e-05
  9.0996423e-07 1.0000000e+00]]
[0 0 0 ... 0 0 0]
[[1.0000000e+00 7.8205645e-01 7.8597564e-01 ... 4.7976174e-04
  1.2182771e-03 2.2377873e-04]
 [7.8205645e-01 1.0000000e+00 8.0275476e-01 ... 4.4002640e-04
  1.1147797e-03 2.0641035e-04]
 [7.8597564e-01 8.0275476e-01 1.0000000e+00 ... 4.2376795e-04
  1.0552508e-03 1.9668184e-04]
 ...
 [4.7976174e-04 4.4002640e-04 4.2376795e-04 ... 1.0000000e+00
  1.4918220e-03 2.8085927e-02]
 [1.2182771e-03 1.1147797e-03 1.0552508e-

In [8]:
# Save fake features and labels
np.save("./all_fake_features.npy", all_fake_features)
np.save("./all_fake_labels.npy", all_fake_labels)

# Save original features and labels
np.save("./all_original_features.npy", all_original_features)
np.save("./all_original_labels.npy", all_original_labels)


In [9]:
np.save("./all_fake_features_face_pca.npy", pca_fake_features)

np.save("./all_original_features_face_pca.npy", pca_original_features)


In [10]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load features and labels
fake_features = np.load("./all_fake_features.npy")
original_features = np.load("./all_original_features.npy")

fake_labels = np.load("./all_fake_labels.npy")
original_labels = np.load("./all_original_labels.npy")

# Combine features and labels
all_features = np.concatenate((fake_features, original_features), axis=0)
all_labels = np.concatenate((fake_labels, original_labels), axis=0)

# Reshape features to match the number of labels
all_features_reshaped = all_features[:2304, :]

# Now you can proceed to split the data
X_train, X_test, y_train, y_test = train_test_split(all_features_reshaped, all_labels, test_size=0.2, random_state=42)

# Reshape features for SVM
X_train_svm = X_train.reshape(X_train.shape[0], -1)  # Flatten the features
X_test_svm = X_test.reshape(X_test.shape[0], -1)

# Train the SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_svm, y_train)

# Make predictions on the test set
y_pred_svm = svm_classifier.predict(X_test_svm)

# Evaluate the SVM model
accuracy_svm_4 = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm_4 * 100:.2f}%")


SVM Accuracy: 80.69%


In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load features and labels
fake_features = np.load("./all_fake_features_pca.npy")
original_features = np.load("./all_original_features_pca.npy")

fake_labels = np.load("./all_fake_labels.npy")
original_labels = np.load("./all_original_labels.npy")

# Combine features and labels
all_features = np.concatenate((fake_features, original_features), axis=0)
all_labels = np.concatenate((fake_labels, original_labels), axis=0)

# Reshape features to match the number of labels
all_features_reshaped = all_features[:2304, :]

# Now you can proceed to split the data
X_train, X_test, y_train, y_test = train_test_split(all_features_reshaped, all_labels, test_size=0.2, random_state=42)

# Reshape features for SVM
X_train_svm = X_train.reshape(X_train.shape[0], -1)  # Flatten the features
X_test_svm = X_test.reshape(X_test.shape[0], -1)

# Train the SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_svm, y_train)

# Make predictions on the test set
y_pred_svm = svm_classifier.predict(X_test_svm)

# Evaluate the SVM model
accuracy_svm_pca_4 = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm_pca_4 * 100:.2f}%")


SVM Accuracy: 48.37%
