In [None]:
import joblib

#Display time to run script

!pip install ipython-autotime
%load_ext autotime


In [None]:
import os
import numpy as np
import torch
from torchvision import models, transforms
from PIL import Image
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tqdm import tqdm

username = 'atbusch78'
git_token = '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'
repository = 'Neural-Nets-2025'

!git clone --depth=1 https://{git_token}@github.com/{username}/{repository}.git
dataset_path = "/content/Neural-Nets-2025/Project/archive"

def find_folder(base_path, target_folder):
    for item in os.listdir(base_path):
        if item.upper() == target_folder.upper():
            return os.path.join(base_path, item)
    raise FileNotFoundError(f"Could not find {target_folder} in {base_path}")

# Find paths
train_real_path = find_folder(os.path.join(dataset_path, "train"), "REAL")
train_fake_path = find_folder(os.path.join(dataset_path, "train"), "FAKE")
test_real_path = find_folder(os.path.join(dataset_path, "test"), "REAL")
test_fake_path = find_folder(os.path.join(dataset_path, "test"), "FAKE")

# --- New Code ---
# Feature Extractor
device = 'cuda' if torch.cuda.is_available() else 'cpu'
resnet = models.resnet18(pretrained=True)
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])  # Remove classifier head
resnet = resnet.to(device)
resnet.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

def extract_features_from_folder(folder_path, label, max_images=None):
    features, labels = [], []
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('png', 'jpg', 'jpeg'))]

    if max_images is not None:
        image_files = image_files[:max_images]  # Only take up to max_images

    for img_name in tqdm(image_files, desc=f"Extracting from {folder_path}"):
        img_path = os.path.join(folder_path, img_name)
        try:
            img = Image.open(img_path).convert('RGB')
            img = transform(img).unsqueeze(0).to(device)
            with torch.no_grad():
                feat = resnet(img)
            features.append(feat.cpu().numpy().flatten())
            labels.append(label)
        except Exception as e:
            print(f"Skipping {img_name}: {e}")

    return np.array(features), np.array(labels)

# --- Extract features with limits ---
X_train_real, y_train_real = extract_features_from_folder(train_real_path, label=1, max_images=4000)
X_train_fake, y_train_fake = extract_features_from_folder(train_fake_path, label=0, max_images=4000)
X_test_real, y_test_real = extract_features_from_folder(test_real_path, label=1, max_images=1000)
X_test_fake, y_test_fake = extract_features_from_folder(test_fake_path, label=0, max_images=1000)


# Stack data
X_train = np.vstack([X_train_real, X_train_fake])
y_train = np.concatenate([y_train_real, y_train_fake])

X_test = np.vstack([X_test_real, X_test_fake])
y_test = np.concatenate([y_test_real, y_test_fake])

# --- Random Forest Training ---
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# --- Evaluation ---
y_pred = rf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=["FAKE", "REAL"]))



In [None]:
# Save the model to a file
joblib.dump(rf, "random_forest_model.pkl")
# Load the model
rf_loaded = joblib.load("random_forest_model.pkl")


y_pred = rf_loaded.predict(X_test)


time: 226 ms (started: 2025-04-30 16:25:46 +00:00)


In [None]:
!pip install streamlit pyngrok -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25htime: 7.75 s (started: 2025-04-30 16:25:49 +00:00)


In [None]:

%%writefile app.py
import streamlit as st
import joblib
import torch
from torchvision import models, transforms
from PIL import Image
import numpy as np
import os

# --- Configuration ---
MODEL_PATH = "random_forest_model.pkl" # Path to the saved RF model

# ***** MOVE THIS LINE UP *****
# --- Streamlit Page Configuration (MUST BE FIRST STREAMLIT COMMAND) ---
st.set_page_config(
    page_title="Deepfake Detector",
    page_icon="🖼️",
    layout="wide"
)
# ***** END OF MOVE *****


# --- Load Feature Extractor (ResNet) ---
# Re-define the feature extractor setup exactly as in the training notebook
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Use the recommended way to load pretrained weights
try:
    weights = models.ResNet18_Weights.DEFAULT
except AttributeError: # Older torchvision versions might not have DEFAULT
     weights = models.ResNet18_Weights.IMAGENET1K_V1

resnet = models.resnet18(weights=weights)
resnet = torch.nn.Sequential(*list(resnet.children())[:-1]) # Remove classifier head
resnet = resnet.to(device)
resnet.eval() # Set to evaluation mode

# --- Define Image Transformations ---
# Must be the same as used during training feature extraction
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    # Note: Normalization was not explicitly included in the original notebook's transform.
    # If features were extracted *without* normalization, keep it commented out.
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# --- Load Trained Classifier ---
@st.cache_resource # Cache the loaded model for efficiency
def load_classifier(model_path):
    """Loads the saved Random Forest classifier."""
    if not os.path.exists(model_path):
        # This st.error() is now fine because set_page_config was called first
        st.error(f"Model file not found at {model_path}. Please ensure the previous notebook cells have run successfully and saved the model.")
        return None
    try:
        classifier = joblib.load(model_path)
        return classifier
    except Exception as e:
        # This st.error() is now fine
        st.error(f"Error loading the model: {e}")
        return None

# --- Load the classifier AFTER set_page_config ---
classifier = load_classifier(MODEL_PATH)

# --- Helper Function ---
def predict_image(image_bytes):
    """Preprocesses image, extracts features, and predicts using the loaded classifier."""
    if classifier is None:
        # This st.error() is now fine
        st.error("Classifier not loaded. Cannot predict.")
        return None, None

    try:
        img = Image.open(image_bytes).convert('RGB')

        # Apply transformations
        img_t = transform(img).unsqueeze(0).to(device) # Add batch dimension and send to device

        # Extract features using ResNet
        with torch.no_grad():
            features = resnet(img_t)

        # Prepare features for Random Forest (flatten)
        features_np = features.cpu().numpy().flatten().reshape(1, -1) # Reshape for single prediction

        # Predict using the loaded Random Forest model
        prediction = classifier.predict(features_np)[0] # Get the single prediction
        probability = classifier.predict_proba(features_np)[0] # Get probabilities for [FAKE, REAL]

        return prediction, probability

    except Exception as e:
        # This st.error() is now fine
        st.error(f"An error occurred during prediction: {e}")
        return None, None

# --- Streamlit UI ---
# set_page_config was moved to the top

st.title("🖼️ Deepfake Image Detector UI")
st.write("Upload an image to classify it as REAL or FAKE using a ResNet feature extractor and a Random Forest classifier.")
st.write(f"*(Using device: {device.upper()})*")

uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Display the uploaded image
    st.image(uploaded_file, caption='Uploaded Image.', use_column_width=True)
    st.write("") # Add some space

    # Predict on button click
    if st.button('Classify Image'):
        # Ensure classifier is loaded before attempting prediction in the UI
        if classifier is not None:
             with st.spinner('Analyzing image...'):
                prediction, probability = predict_image(uploaded_file)

                if prediction is not None:
                    st.subheader("Prediction Result:")
                    if prediction == 1:
                        st.success(f"**REAL** (Confidence: {probability[1]:.2%})")
                    else:
                        st.error(f"**FAKE** (Confidence: {probability[0]:.2%})")
        else:
            st.error("Model could not be loaded. Please check logs and ensure 'random_forest_model.pkl' exists.")


else:
    st.info("Upload an image file to start classification.")

# Add a footer or sidebar info if desired
st.sidebar.header("About")
st.sidebar.info(
    "This app uses features extracted by a pre-trained ResNet-18 model, "
    "which are then fed into a Random Forest classifier trained on a dataset "
    "of real and fake images (like those generated by StyleGAN)."
    f"\n\nModel file expected: `{MODEL_PATH}`"
    f"\nCompute device: `{device.upper()}`"
)

Writing app.py
time: 21 ms (started: 2025-04-30 16:26:00 +00:00)


In [None]:
if not os.path.exists("app.py"):
    print("Error: app.py not found. Please run Cell 5 first.")
else:
    # Install localtunnel using npm (Node Package Manager)
    print("Installing localtunnel...")
    !npm install -g localtunnel

Installing localtunnel...
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K
added 22 packages in 6s
[1G[0K⠼[1G[0K
[1G[0K⠼[1G[0K3 packages are looking for funding
[1G[0K⠼[1G[0K  run `npm fund` for details
[1G[0K⠼[1G[0Ktime: 6.56 s (started: 2025-04-30 16:26:09 +00:00)


In [None]:
!wget -q -O - ipv4.icanhazip.com

34.138.84.69


In [None]:
# CELL 6: Launch Streamlit and Localtunnel
# (Assumes app.py exists and localtunnel is installed from previous cells)

import os

# Define the port
port = 8501 # Streamlit's default port

# Run Streamlit in the background & start localtunnel
print(f"Attempting to start Streamlit on port {port} and launch localtunnel...")
print("Ensure the cell writing 'app.py' and the cell installing 'localtunnel' have been run successfully.")

# Use nohup to prevent hang-up and redirect stderr to stdout for better logging in Colab
# The '&' runs streamlit in the background FIRST, then npx runs in the foreground of the cell
!nohup streamlit run app.py --server.port {port} > streamlit_log.txt 2>&1 &

# Launch localtunnel - this will now run in the foreground of this cell
!npx localtunnel --port {port}

# Note: The output below the cell will show the localtunnel URL (e.g., https://<random-word>.loca.lt)
# You might need to click through a warning page the first time you access the URL.
print("\n----> Look for a URL ending in '.loca.lt' in the output above or below this message. <----")
print("      (You might need to scroll up in the output to find it)")
print("\nNote: The UI will become unresponsive if the Colab session ends or this cell is stopped.")
print("      If the app seems stuck or shows errors, check the 'streamlit_log.txt' file for details:")
print("      !cat streamlit_log.txt")

Attempting to start Streamlit on port 8501 and launch localtunnel...
Ensure the cell writing 'app.py' and the cell installing 'localtunnel' have been run successfully.
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0Kyour url is: https://dirty-donkeys-sin.loca.lt
