In [None]:
!pip install gradio
!pip install xgboost
!pip install tensorflow

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import gradio as gr
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("insta_train.csv")

# Define features and target variable
X = df.drop(columns=['fake'])  # Features
y = df['fake']  # Target: 0 = Not Fake, 1 = Fake

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler for later use
joblib.dump(scaler, "scaler.pkl")

# ============== BEST ML MODEL: XGBOOST ============== #
xgb = XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=7, random_state=42)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)
xgb_acc = accuracy_score(y_test, xgb_pred)

# ============== ANN MODEL ============== #
model = Sequential([
    Dense(128, input_dim=X_train.shape[1], activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(2, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, validation_data=(X_test_scaled, y_test), batch_size=32, verbose=1)

# Evaluate ANN model
ann_pred = np.argmax(model.predict(X_test_scaled), axis=1)
ann_acc = accuracy_score(y_test, ann_pred)

# Choose the best model
best_model = "XGBoost" if xgb_acc > ann_acc else "ANN"
best_acc = max(xgb_acc, ann_acc)
print(f"Best Model: {best_model} with Accuracy: {best_acc:.2f}")

# Save the ANN model
model.save("fake_profile_model.h5")

# ============== GRADIO INTERFACE ============== #
def predict_fake_profile(profile_pic, nums_length_username, fullname_words,
                         nums_length_fullname, name_equals_username, description_length,
                         external_url, private, num_posts, num_followers, num_follows):

    # Convert input into a numpy array
    user_input = np.array([[profile_pic, nums_length_username, fullname_words,
                            nums_length_fullname, name_equals_username, description_length,
                            external_url, private, num_posts, num_followers, num_follows]])

    # Normalize input data
    user_input_scaled = scaler.transform(user_input)

    # Predict using the best model
    if best_model == "XGBoost":
        prediction = xgb.predict(user_input)
    else:  # ANN
        prediction = np.argmax(model.predict(user_input_scaled), axis=1)

    return "FAKE Profile (1)" if prediction[0] == 1 else "NOT FAKE Profile (0)"

# Create Gradio Interface
iface = gr.Interface(
    fn=predict_fake_profile,
    inputs=[
        gr.Radio([1, 0], label="Profile Pic (1 for Yes, 0 for No)"),
        gr.Number(label="Nums/Length Username"),
        gr.Number(label="Fullname Words"),
        gr.Number(label="Nums/Length Fullname"),
        gr.Radio([1, 0], label="Name == Username (1 for Yes, 0 for No)"),
        gr.Number(label="Description Length"),
        gr.Radio([1, 0], label="External URL (1 for Yes, 0 for No)"),
        gr.Radio([1, 0], label="Private (1 for Yes, 0 for No)"),
        gr.Number(label="# Posts"),
        gr.Number(label="# Followers"),
        gr.Number(label="# Follows")
    ],
    outputs="text",
    title="Fake Profile Detection",
    description=f"Best Model Selected: {best_model} with Accuracy: {best_acc:.2f}\nEnter the profile details to check if it's FAKE (1) or NOT FAKE (0).",
)

# Launch Gradio app
iface.launch()


In [None]:
pip install tensorflow gradio xgboost scikit-learn pandas joblib matplotlib seaborn


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
df = pd.read_csv("insta_train.csv")

# Define features and target variable
X = df.drop(columns=['fake'])  # Features
y = df['fake']  # Target: 0 = Not Fake, 1 = Fake

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save the scaler for later use
joblib.dump(scaler, "scaler.pkl")

# ============== BEST ML MODEL: XGBOOST ============== #
xgb = XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=7, random_state=42)
xgb.fit(X_train, y_train)
xgb_pred = xgb.predict(X_test)
xgb_acc = accuracy_score(y_test, xgb_pred)

# ============== ANN MODEL ============== #
model = Sequential([
    Dense(128, input_dim=X_train.shape[1], activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(2, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, validation_data=(X_test_scaled, y_test), batch_size=32, verbose=1)

# Evaluate ANN model
ann_pred = np.argmax(model.predict(X_test_scaled), axis=1)
ann_acc = accuracy_score(y_test, ann_pred)

# Choose the best model
best_model = "XGBoost" if xgb_acc > ann_acc else "ANN"
best_acc = max(xgb_acc, ann_acc)
print(f"Best Model: {best_model} with Accuracy: {best_acc:.2f}")

# Save the ANN model
model.save("fake_profile_model.h5")

# ============== DATA VISUALIZATION ============== #
# 1️⃣ Confusion Matrix
def plot_confusion_matrix():
    cm = confusion_matrix(y_test, xgb_pred if best_model == "XGBoost" else ann_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Fake", "Fake"], yticklabels=["Not Fake", "Fake"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix - {best_model}")
    plt.show()

# 2️⃣ Feature Importance (XGBoost)
if best_model == "XGBoost":
    def plot_feature_importance():
        importance = xgb.feature_importances_
        features = X.columns
        plt.figure(figsize=(8, 6))
        sns.barplot(x=importance, y=features, palette="coolwarm")
        plt.title("Feature Importance (XGBoost)")
        plt.xlabel("Importance Score")
        plt.ylabel("Features")
        plt.show()

# 3️⃣ Accuracy & Loss Curve (ANN)
def plot_training_curves():
    plt.figure(figsize=(12, 4))

    # Plot Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history["accuracy"], label="Train Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.title("Model Accuracy Over Epochs")
    plt.legend()

    # Plot Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history["loss"], label="Train Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Model Loss Over Epochs")
    plt.legend()

    plt.show()

# ============== GRADIO INTERFACE ============== #
def predict_fake_profile(profile_pic, nums_length_username, fullname_words,
                         nums_length_fullname, name_equals_username, description_length,
                         external_url, private, num_posts, num_followers, num_follows):

    # Convert input into a numpy array
    user_input = np.array([[profile_pic, nums_length_username, fullname_words,
                            nums_length_fullname, name_equals_username, description_length,
                            external_url, private, num_posts, num_followers, num_follows]])

    # Normalize input data
    user_input_scaled = scaler.transform(user_input)

    # Predict using the best model
    if best_model == "XGBoost":
        prediction = xgb.predict(user_input)
    else:  # ANN
        prediction = np.argmax(model.predict(user_input_scaled), axis=1)

    # Return output message
    if prediction[0] == 1:
        return f"🚨 **FAKE Profile!** 🚨\n⚠️ **Ban the ID as soon as possible!**\n\n🔢 Accuracy Score: {best_acc:.2f}"
    else:
        return f"✅ **NO Fake Profile** ✅\n👍 **The account is safe!**\n\n🔢 Accuracy Score: {best_acc:.2f}"

# Create Gradio Interface
iface = gr.Interface(
    fn=predict_fake_profile,
    inputs=[
        gr.Radio([1, 0], label="Profile Pic (1 for Yes, 0 for No)"),
        gr.Number(label="Nums/Length Username"),
        gr.Number(label="Fullname Words"),
        gr.Number(label="Nums/Length Fullname"),
        gr.Radio([1, 0], label="Name == Username (1 for Yes, 0 for No)"),
        gr.Number(label="Description Length"),
        gr.Radio([1, 0], label="External URL (1 for Yes, 0 for No)"),
        gr.Radio([1, 0], label="Private (1 for Yes, 0 for No)"),
        gr.Number(label="# Posts"),
        gr.Number(label="# Followers"),
        gr.Number(label="# Follows")
    ],
    outputs="text",
    title="Fake Profile Detection",
    description=f"🔍 **Best Model Selected: {best_model}** (Accuracy: {best_acc:.2f})\n📌 Enter the profile details to check if it's FAKE (🚨) or NOT FAKE (✅).",
)

# Show visualizations
plot_confusion_matrix()
if best_model == "XGBoost":
    plot_feature_importance()
if best_model == "ANN":
    plot_training_curves()

# Launch Gradio app
iface.launch()
