In [None]:
# 📘 Step 1: Install Required Packages
!pip install pandas scikit-learn colorama matplotlib openpyxl


In [None]:
# 📘 Step 2: Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from colorama import Fore, Style
import time, json, re
from google.colab import files


In [None]:
# 📘 Step 3: Upload External Log File (CSV, JSON, .log)
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
print(f"📂 Uploaded: {file_name}")


In [None]:
# 📘 Step 4: Parse Uploaded File Based on Extension
def parse_log_file(file_name):
    if file_name.endswith('.csv'):
        df = pd.read_csv(file_name)
    elif file_name.endswith('.json'):
        with open(file_name) as f:
            data = json.load(f)
        df = pd.DataFrame(data)
    elif file_name.endswith('.log'):
        logs = []
        with open(file_name) as f:
            for line in f:
                match = re.match(r'^(\S+ \S+)\s+(INFO|WARN|ERROR)\s+(.*)$', line.strip())
                if match:
                    logs.append({
                        "timestamp": match.group(1),
                        "log_level": match.group(2),
                        "message": match.group(3)
                    })
        df = pd.DataFrame(logs)
    else:
        raise ValueError("Unsupported file format")
    return df

df = parse_log_file(file_name)
df.columns = [c.lower() for c in df.columns]
if 'time' in df.columns: df = df.rename(columns={'time': 'timestamp'})
if 'label' in df.columns: df = df.rename(columns={'label': 'log_level'})
if 'content' in df.columns: df = df.rename(columns={'content': 'message'})
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp'])
df.head()


In [None]:
# 📘 Step 5: Feature Extraction
def extract_features(df):
    df['error'] = df['log_level'].astype(str).apply(lambda x: 1 if 'ERROR' in x else 0)
    df['warn'] = df['log_level'].astype(str).apply(lambda x: 1 if 'WARN' in x else 0)
    df['minute'] = df['timestamp'].dt.minute
    return df[['error', 'warn', 'minute']]

features = extract_features(df)


In [None]:
# 📘 Step 6: Anomaly Detection
model = IsolationForest(contamination=0.2, random_state=42)
model.fit(features)
df['incident'] = model.predict(features).tolist()
df['incident'] = df['incident'].apply(lambda x: 1 if x == -1 else 0)
df.head()


In [None]:
# 📘 Step 7: Alerting with Alert Fatigue Handling
from collections import defaultdict
import hashlib

alert_memory = defaultdict(int)
last_alert_minute = defaultdict(lambda: -99)

def hash_message(msg):
    return hashlib.md5(msg.encode()).hexdigest()

def send_fatigue_alerts(df, cooldown=2):
    for _, row in df.iterrows():
        if row['incident']:
            key = hash_message(row['message'])
            current_minute = row['timestamp'].minute
            if current_minute - last_alert_minute[key] >= cooldown:
                print(f"{Fore.RED}[ALERT] {row['timestamp']} | {row['message']}{Style.RESET_ALL}")
                last_alert_minute[key] = current_minute

send_fatigue_alerts(df)


In [None]:
# 📘 Step 8: Export Detected Alerts
alerts = df[df['incident'] == 1]
alerts.to_csv("alerts.csv", index=False)
alerts.to_excel("alerts.xlsx", index=False)
print("✅ Exported alerts to alerts.csv and alerts.xlsx")


In [None]:
# 📘 Step 9: Plot Incidents Over Time
plt.figure(figsize=(10, 5))
plt.plot(df['timestamp'], df['incident'], marker='o', linestyle='-')
plt.title("Incident Prediction Over Time")
plt.xlabel("Timestamp")
plt.ylabel("Incident (1=Yes, 0=No)")
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# 📘 Step 10: Real-Time Simulation with Fatigue Handling
def simulate_real_time(df, delay=1, cooldown=2):
    print("\n🔁 Real-time Simulation Starting...\n")
    model = IsolationForest(contamination=0.2, random_state=42)
    fatigue_tracker = defaultdict(lambda: -99)

    for i in range(3, len(df)+1):
        batch = df.iloc[:i].copy()
        feats = extract_features(batch)
        model.fit(feats)
        batch['incident'] = model.predict(feats)
        batch['incident'] = batch['incident'].apply(lambda x: 1 if x == -1 else 0)
        latest = batch.iloc[-1]
        key = hash_message(latest['message'])
        minute = latest['timestamp'].minute
        if latest['incident'] and (minute - fatigue_tracker[key] >= cooldown):
            print(f"{Fore.RED}[REAL-TIME ALERT] {latest['timestamp']} | {latest['message']}{Style.RESET_ALL}")
            fatigue_tracker[key] = minute
        else:
            print(f"[OK] {latest['timestamp']} | {latest['message']}")
        time.sleep(delay)

simulate_real_time(df, delay=1)
