In [1]:
import tensorflow as tf
import pandas as pd
import cv2
import numpy as np
import os
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# Load dataset CSV
csv_path = r"C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\data_labels_cleaned.csv"
df = pd.read_csv(csv_path)

# Remove missing file paths
df['file_path'] = df['file_path'].astype(str)
df = df[df['file_path'].apply(os.path.exists)]

# Split into train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

# Image preprocessing function
def preprocess_image(image_path, label):
    image_path = image_path.decode('utf-8')

    if not os.path.exists(image_path):
        print(f"Skipping missing image: {image_path}")
        return np.zeros((128, 128, 3), dtype=np.float32), np.float32(0)

    image = cv2.imread(image_path)
    if image is None:
        print(f"Skipping unreadable image: {image_path}")
        return np.zeros((128, 128, 3), dtype=np.float32), np.float32(0)

    image = cv2.resize(image, (128, 128))  # Resize to MobileNetV2 input size
    image = image.astype(np.float32) / 255.0  # Normalize
    return image, np.array(label, dtype=np.float32)

# Function to create TensorFlow dataset
def create_tf_dataset(df, batch_size=32):
    image_paths = df['file_path'].values
    labels = df['label'].values.astype(np.float32)

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))

    def process_path(path, label):
        image, label = tf.numpy_function(
            func=preprocess_image, inp=[path, label], Tout=(tf.float32, tf.float32)
        )
        image.set_shape((128, 128, 3))
        label.set_shape(())
        return image, label

    dataset = dataset.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Create train and validation datasets
train_dataset = create_tf_dataset(train_df)
val_dataset = create_tf_dataset(val_df)

# Load MobileNetV2 as base model (without top layers)
base_model = MobileNetV2(input_shape=(128, 128, 3), include_top=False, weights="imagenet")
base_model.trainable = False  # Freeze base model

# Build classification model
inputs = Input(shape=(128, 128, 3))
x = base_model(inputs, training=False)  # Use base model
x = GlobalAveragePooling2D()(x)  # Global pooling to reduce features
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(1, activation='sigmoid')(x)  # Binary classification

model = Model(inputs, outputs)

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)


# Train model
model.fit(train_dataset, validation_data=val_dataset, epochs=50, callbacks=[early_stopping])

# Save trained model
model.save(r"C:\Users\aryes\OneDrive\Desktop\vista-25\saved_model_mobilenetv2.h5")


Epoch 1/50
[1m 240/1200[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m22:42[0m 1s/step - accuracy: 0.6708 - loss: 0.6604Skipping unreadable image: C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\train\real\real_11957.jpg
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1336s[0m 1s/step - accuracy: 0.7498 - loss: 0.5245 - val_accuracy: 0.8436 - val_loss: 0.3649
Epoch 2/50
[1m 240/1200[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m10:34[0m 661ms/step - accuracy: 0.8351 - loss: 0.3945Skipping unreadable image: C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\train\real\real_11957.jpg
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1003s[0m 822ms/step - accuracy: 0.8342 - loss: 0.3840 - val_accuracy: 0.8526 - val_loss: 0.3458
Epoch 3/50
[1m 240/1200[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m10:39[0m 666ms/step - accuracy: 0.8521 - loss: 0.3535Skipping unreadable image: C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\train\real\real_11957.jpg
[1m12



In [2]:
import tensorflow as tf
import cv2
import numpy as np
import os
import pandas as pd

# Load the trained model
model = tf.keras.models.load_model(r"C:\Users\aryes\OneDrive\Desktop\vista-25\saved_model_mobilenetv2.h5")

# Image preprocessing function
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Skipping unreadable image: {image_path}")
        return None
    
    image = cv2.resize(image, (128, 128))  # Resize
    image = image.astype(np.float32) / 255.0  # Normalize
    return image

# Test function with CSV saving
def test_and_save_results(test_dir, model, output_csv_path, batch_size=32):
    # Get all image file paths from the test directory
    image_paths = [os.path.join(test_dir, fname) for fname in os.listdir(test_dir) if fname.endswith(('.jpg', '.png'))]
    
    # Initialize a list to hold the predictions
    results = []
    
    # Iterate over images in the test directory
    for image_path in image_paths:
        image = preprocess_image(image_path)
        if image is None:
            continue
        
        # Add an extra dimension for batch processing
        image = np.expand_dims(image, axis=0)
        
        # Predict the label (0 for real, 1 for fake)
        prediction = model.predict(image)
        predicted_label = 1 if prediction > 0.5 else 0
        predicted_label_str = 'Fake' if predicted_label == 1 else 'Real'
        
        # Save the prediction along with the image path and probability
        results.append({
            'image_path': image_path,
            'prediction': predicted_label_str,
            'probability': prediction[0][0]
        })

    # Create a DataFrame from the results
    results_df = pd.DataFrame(results)

    # Save to CSV
    results_df.to_csv(output_csv_path, index=False)
    print(f"Test results saved to {output_csv_path}")

# Test the model and save results to CSV
test_dir = r"C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\test"
output_csv_path = r"C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\test_results_mobilenetv2.csv"
test_and_save_results(test_dir, model, output_csv_path)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms

In [4]:
import pandas as pd
import os

# Read the original CSV file
df = pd.read_csv(r'C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\test_results_mobilenetv2.csv')

# Extract image number from the full path
df['image_id'] = df['image_path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])

# Convert prediction to binary label (0 for Fake, 1 for Real)
df['label'] = (df['prediction'] == 'Real').astype(int)

# Create the new submission DataFrame with only image_id and label
submission_df = df[['image_id', 'label']]

# Sort the DataFrame by image_id
submission_df = submission_df.sort_values('image_id')
submission_df = df[['image_id', 'label']].sort_values('image_id')
# Save the new submission CSV
submission_df.to_csv('new_submission_mobilenetv2.csv', index=False)

print("New submission CSV created successfully!")
print("\nTotal number of images:", len(submission_df))
print("\nLabel distribution:")
print(submission_df['label'].value_counts())

# Display the first few rows to verify
print("\nFirst few rows of the new submission file:")
submission_df.head()


New submission CSV created successfully!

Total number of images: 11999

Label distribution:
label
1    6017
0    5982
Name: count, dtype: int64

First few rows of the new submission file:


Unnamed: 0,image_id,label
0,0,1
1,1,0
2,10,0
3,100,0
4,1000,1


In [1]:
import pandas as pd
import os

# Load the original CSV
df = pd.read_csv(r"C:\Users\aryes\OneDrive\Desktop\vista-25\dataset\test_results_mobilenetv2.csv")

# Extract numeric image ID from the file name (e.g., "100.jpg" -> 100)
df["image_id"] = df["image_path"].apply(lambda x: int(os.path.basename(x).split(".")[0]))

# Rename "probability" to "confidence"
df.rename(columns={"probability": "confidence"}, inplace=True)

# Create a new DataFrame with the desired columns
new_df = df[["image_id", "prediction", "confidence"]]

# Sort by image_id in ascending order
new_df = new_df.sort_values("image_id")

# Save to a new CSV file
new_df.to_csv("image_prediction_confidence.csv", index=False)

print("New CSV file created: image_prediction_confidence_mobilenetv2.csv")


New CSV file created: image_prediction_confidence_mobilenetv2.csv


In [8]:
import pandas as pd 

df=pd.read_csv('new_submission.csv')

l=df['image_id']

for i in range(12000):
    if l[i]!=i:
        print(i)




In [19]:
df=pd.read_csv(r"C:\Users\aryes\OneDrive\Desktop\vista-25\image_prediction_confidence.csv")

df.dropna(axis=1,inplace=True)
df.to_csv(r"C:\Users\aryes\OneDrive\Desktop\vista-25\image_prediction_confidence.csv")

In [26]:
df=pd.read_csv(r"C:\Users\aryes\Downloads\image_prediction_confidence_cleaned.csv")
df

Unnamed: 0,image_id,label
0,0,0.074067
1,1,0.916158
2,2,0.225074
3,3,0.930096
4,4,0.979560
...,...,...
11995,11995,0.002842
11996,11996,0.143825
11997,11997,0.602134
11998,11998,0.993056
