In [None]:
import psutil
import os
import time
import random
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import threading

# Simulated malicious file behavior (for demo purposes)
def generate_system_behavior_data():
    data = []
    for _ in range(100):
        file_size = random.randint(100, 10000)  # Random file size (bytes)
        access_count = random.randint(1, 50)    # Random file access count
        network_access = random.randint(0, 10)  # Random network access activity
        system_call_count = random.randint(5, 100)  # System call frequency
        is_malicious = random.choice([0, 1])  # 0: benign, 1: malicious (for demo)
        data.append({
            'file_size': file_size,
            'access_count': access_count,
            'network_access': network_access,
            'system_call_count': system_call_count,
            'is_malicious': is_malicious
        })
    return data

# Preprocessing the dataset
def preprocess_behavior_data(data):
    df = pd.DataFrame(data)
    X = df.drop('is_malicious', axis=1)
    y = df['is_malicious']
    
    # Scale the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y, scaler

# Train the model to detect malicious files
def train_antivirus_model():
    data = generate_system_behavior_data()
    X_scaled, y, scaler = preprocess_behavior_data(data)
    
    # Train a Random Forest Classifier
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_scaled, y)
    
    return model, scaler

# Monitor system processes and file behaviors in real-time
def monitor_system_activity(model, scaler):
    print("Monitoring system activity...")

    while True:
        # Simulate data collection for the system's current activity
        file_size = random.randint(100, 10000)
        access_count = random.randint(1, 50)
        network_access = random.randint(0, 10)
        system_call_count = random.randint(5, 100)
        
        # Preprocess the data before prediction
        current_data = pd.DataFrame([{
            'file_size': file_size,
            'access_count': access_count,
            'network_access': network_access,
            'system_call_count': system_call_count
        }])
        current_data_scaled = scaler.transform(current_data)

        # Predict if the current system behavior is malicious
        prediction = model.predict(current_data_scaled)
        if prediction == 1:
            print("Malicious activity detected!")
            # Implement action to quarantine the malicious file (for demo)
            quarantine_file(file_size)
        
        time.sleep(2)  # Monitor every 2 seconds (simulation)

# Simulate quarantine of a malicious file
def quarantine_file(file_size):
    print(f"Quarantining file with size: {file_size} bytes...")

# Main function to train the model and start monitoring
def main():
    # Train the model
    model, scaler = train_antivirus_model()

    # Start the monitoring system in a separate thread
    monitoring_thread = threading.Thread(target=monitor_system_activity, args=(model, scaler))
    monitoring_thread.start()

if __name__ == "__main__":
    main()