In [None]:
from flask import Flask, request, jsonify
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pickle

app = Flask(__name__)

# Load dataset and preprocess
df = pd.read_csv('../../Data Warehousing ETL/Transformed_Data/individuals_cyber_attacks_europe.csv')

# Define new classes for quantile-based binning
classes = ['Low', 'Medium', 'High']

# Apply quantile-based binning with pd.qcut
df['Anomaly_Class'] = pd.qcut(df['Anomaly_Scores'], q=3, labels=classes)

# Drop unnecessary columns
df = df.drop(columns=['Anomaly_Scores', 'User_Information', 'Payload_Data', 'Timestamp'])

# Label encoding categorical columns
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column].astype(str))
    label_encoders[column] = le

# Split the data into features (X) and target (y)
X = df.drop(columns=['Anomaly_Class'])
y = df['Anomaly_Class']

# Encode the target labels using LabelEncoder
le_target = LabelEncoder()
y_encoded = le_target.fit_transform(y)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

# Feature selection using RandomForest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Select top 10 features based on importance
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]
top_10_indices = indices[:10]
top_10_features = X.columns[top_10_indices]

X_train_top_10 = X_train[top_10_features]
X_test_top_10 = X_test[top_10_features]

# Store classifiers
classifiers = {
    'RandomForest': RandomForestClassifier(random_state=42),
    'GradientBoosting': GradientBoostingClassifier(random_state=42),
    'XGBoost': XGBClassifier(random_state=42)
}

# Utility function to evaluate model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    cm = confusion_matrix(y_test, y_pred)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': cm.tolist(),
        'classification_report': classification_report(y_test, y_pred, target_names=le_target.classes_, output_dict=True)
    }

# Route for training the models
@app.route('/train', methods=['POST'])
def train_model():
    model_name = request.json.get('model_name', 'RandomForest')
    
    if model_name not in classifiers:
        return jsonify({'error': 'Model not found'}), 400

    # Train the model
    model = classifiers[model_name]
    model.fit(X_train_top_10, y_train)

    # Save the model
    with open(f"{model_name}_model.pkl", "wb") as f:
        pickle.dump(model, f)

    return jsonify({'message': f"{model_name} model trained and saved successfully!"})

# Route for evaluating models
@app.route('/evaluate', methods=['POST'])
def evaluate():
    model_name = request.json.get('model_name', 'RandomForest')

    # Load the model
    try:
        with open(f"{model_name}_model.pkl", "rb") as f:
            model = pickle.load(f)
    except FileNotFoundError:
        return jsonify({'error': 'Model not found'}), 404

    # Evaluate the model
    results = evaluate_model(model, X_test_top_10, y_test)

    return jsonify(results)

# Route for making predictions
@app.route('/predict', methods=['POST'])
def predict():
    model_name = request.json.get('model_name', 'RandomForest')
    input_data = request.json.get('input_data')

    # Load the model
    try:
        with open(f"{model_name}_model.pkl", "rb") as f:
            model = pickle.load(f)
    except FileNotFoundError:
        return jsonify({'error': 'Model not found'}), 404

    input_df = pd.DataFrame(input_data, index=[0])
    
    # Ensure that the input has the top 10 features
    input_df = input_df[top_10_features]

    # Make prediction
    prediction = model.predict(input_df)
    predicted_class = le_target.inverse_transform(prediction)[0]

    return jsonify({'predicted_class': predicted_class})

if __name__ == '__main__':
    app.run(debug=True)
