In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ===== Step 1: Load Data =====
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# ===== Step 2: Define Features and Target =====
# You can adjust based on your dataset‚Äôs exact column names
features = ['profile_pic', 'nums_length_username', 'fullname_words', 
            'nums_length_fullname', 'name_length', 'description_length', 
            'external_url', 'private', 'posts', 'followers', 'follows']

target = 'fake'

X_train = train[features]
y_train = train[target]

X_test = test[features]
y_test = test[target]

# ===== Step 3: Scale the Features =====
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ===== Step 4: Define Models =====
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='rbf', probability=True, random_state=42)
}

# ===== Step 5: Train and Evaluate =====
results = {}

for name, model in models.items():
    print(f"\nüîπ Training {name}...")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")
    print(f"{name} Classification Report:\n", classification_report(y_test, y_pred))
    print(f"{name} Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    
    results[name] = acc

# ===== Step 6: Compare Models =====
print("\nüìä Model Comparison:")
for name, acc in results.items():
    print(f"{name}: {acc:.4f}")

# ===== Step 7: Choose Best Model =====
best_model_name = max(results, key=results.get)
print(f"\nüèÜ Best Model: {best_model_name}")

# (Optional) Save best model
import joblib
best_model = models[best_model_name]
joblib.dump(best_model, "best_model.pkl")
print(f"‚úÖ Model saved as best_model.pkl")



üîπ Training Random Forest...
Random Forest Accuracy: 0.9167
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.93      0.92        60
           1       0.93      0.90      0.92        60

    accuracy                           0.92       120
   macro avg       0.92      0.92      0.92       120
weighted avg       0.92      0.92      0.92       120

Random Forest Confusion Matrix:
 [[56  4]
 [ 6 54]]

üîπ Training Gradient Boosting...
Gradient Boosting Accuracy: 0.9167
Gradient Boosting Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.93      0.92        60
           1       0.93      0.90      0.92        60

    accuracy                           0.92       120
   macro avg       0.92      0.92      0.92       120
weighted avg       0.92      0.92      0.92       120

Gradient Boosting Confusion Matrix:
 [[56  4]
 [ 6 54]]

üîπ Training SVM...
SV