In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from flask import Flask, render_template, jsonify
import plotly.express as px
import time
import random

# Initialize Flask app
app = Flask(__name__)

# Example user behavior dataset (user_id, login_time, file_access_count, network_activity, timestamp)
def generate_user_activity_data():
    data = []
    users = ['user_1', 'user_2', 'user_3', 'user_4', 'user_5']
    for i in range(100):
        user_id = random.choice(users)
        login_time = random.randint(1, 24)  # Random login time (1-24 hours)
        file_access_count = random.randint(1, 20)  # Random number of files accessed
        network_activity = random.randint(1, 100)  # Random network activity level
        timestamp = time.time() - random.randint(0, 3600)  # Random timestamp within the last hour
        data.append({'user_id': user_id, 'login_time': login_time, 'file_access_count': file_access_count, 'network_activity': network_activity, 'timestamp': timestamp})
    return data

# Preprocess and create features for machine learning
def preprocess_user_data(data):
    df = pd.DataFrame(data)
    df['hour_of_day'] = pd.to_datetime(df['timestamp'], unit='s').dt.hour
    return df

# Train anomaly detection model (Random Forest) to detect insider threats
def train_insider_threat_model(df):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df[['login_time', 'file_access_count', 'network_activity', 'hour_of_day']])
    
    # Label normal behavior (0) and potential insider threat (1) using a simple rule-based method (for demo purposes)
    df['threat'] = np.where(df['file_access_count'] > 10, 1, 0)  # Simulate insider threats based on file access count
    
    # Train Random Forest model
    model = RandomForestClassifier(n_estimators=100)
    model.fit(df_scaled, df['threat'])
    
    # Predict potential threats
    df['predicted_threat'] = model.predict(df_scaled)
    return df, model, scaler

# Route to display the insider threat dashboard
@app.route('/')
def index():
    # Generate simulated user activity data
    user_data = generate_user_activity_data()
    df = preprocess_user_data(user_data)
    df, model, scaler = train_insider_threat_model(df)
    
    # Visualize the data using Plotly
    fig = px.scatter(df, x='hour_of_day', y='file_access_count', color='predicted_threat', title='User Activity - Insider Threat Detection',
                     labels={'hour_of_day': 'Hour of Day', 'file_access_count': 'File Access Count'},
                     color_discrete_map={0: 'green', 1: 'red'})
    
    # Create a pie chart for threat distribution (normal vs threat)
    pie_fig = px.pie(df, names='predicted_threat', title='Insider Threat Distribution')
    
    # Convert figures to HTML for embedding in Flask
    scatter_plot_html = fig.to_html(full_html=False)
    pie_chart_html = pie_fig.to_html(full_html=False)
    
    return render_template('insider_threat_dashboard.html', scatter_plot=scatter_plot_html, pie_chart=pie_chart_html)

# Route to get live user activity data (AJAX)
@app.route('/live_user_activity')
def live_user_activity():
    user_data = generate_user_activity_data()
    df = preprocess_user_data(user_data)
    df, model, scaler = train_insider_threat_model(df)
    
    live_data = df[['user_id', 'login_time', 'file_access_count', 'network_activity', 'predicted_threat']].to_dict(orient='records')
    return jsonify(live_data)

# Run the Flask app
if __name__ == '__main__':
    app.run(debug=True)