# üß† Linux User Behavior & Session Analysis
This notebook helps incident responders analyze user login behavior, shell history, and privilege escalation events on a Linux system using forensic logs.

## Evidence location

Before running any analysis, configure where your evidence is located:

- If you have **mounted a full disk image** so that its root filesystem is visible (for example at `/mnt/evidence`), set `EVIDENCE_ROOT` in the next cell to that mount point. The notebook will then look for logs under that root (for example `var/log/auth.log`, `var/log/wtmp`, and per-user shell history files under `home/<username>/`).
- If you only have **exported individual log or history files** (for example from a triage script), leave `EVIDENCE_ROOT` as `None` and set the individual `*_PATH` variables in the next cell to the exact locations of those files.

The notebook **only reads** from these paths; it will not modify your evidence. Always work on a copy of the acquired data, not the original collection.


In [None]:
# üì¶ Setup & Imports
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from IPython.display import display

sns.set(style="darkgrid")

In [None]:
# üìÅ Evidence configuration & file presence check

# Set this to the root of your mounted evidence image (e.g. "/mnt/evidence").
# If you only have individual log files and no full filesystem, set this to None
# and configure the *_PATH variables directly below.
EVIDENCE_ROOT = "/mnt/evidence"  # CHANGE ME: set to your evidence mount point, or None

# If using a mounted image with a full filesystem layout, build paths relative to it
if EVIDENCE_ROOT:
    AUDIT_LOG_PATH = os.path.join(EVIDENCE_ROOT, "var/log/audit/audit.log")
    AUTH_LOG_PATH = os.path.join(EVIDENCE_ROOT, "var/log/auth.log")  # or secure on some distros
    WTMP_PATH = os.path.join(EVIDENCE_ROOT, "var/log/wtmp")
    BTMP_PATH = os.path.join(EVIDENCE_ROOT, "var/log/btmp")

    # Example: point to a specific user's history inside the image
    TARGET_USERNAME = "targetuser"  # CHANGE ME: the account you are investigating
    BASH_HISTORY_PATH = os.path.join(EVIDENCE_ROOT, "home", TARGET_USERNAME, ".bash_history")
    ZSH_HISTORY_PATH = os.path.join(EVIDENCE_ROOT, "home", TARGET_USERNAME, ".zsh_history")
else:
    # Fallback: analyze the current running system instead of mounted evidence
    AUDIT_LOG_PATH = "/var/log/audit/audit.log"
    AUTH_LOG_PATH = "/var/log/auth.log"  # or /var/log/secure on RHEL
    WTMP_PATH = "/var/log/wtmp"
    BTMP_PATH = "/var/log/btmp"
    BASH_HISTORY_PATH = os.path.expanduser("~/.bash_history")
    ZSH_HISTORY_PATH = os.path.expanduser("~/.zsh_history")

# You can also override any of these paths manually if your evidence collection
# saved files into a different folder structure, for example:
# AUDIT_LOG_PATH = "/cases/linux01/audit.log"
# AUTH_LOG_PATH = "/cases/linux01/auth.log"

log_files = {
    "audit": AUDIT_LOG_PATH,
    "bash_history": BASH_HISTORY_PATH,
    "zsh_history": ZSH_HISTORY_PATH,
    "wtmp": WTMP_PATH,
    "btmp": BTMP_PATH,
    "auth": AUTH_LOG_PATH,
}

print("Evidence file presence check:")
for name, path in log_files.items():
    print(f"{name:12s}: {path} -> {'FOUND' if os.path.exists(path) else 'MISSING'}")

In [None]:
# üïì Login Timeline from wtmp
def parse_last_output(file_path):
    from subprocess import check_output
    output = check_output(["last", "-F", "-f", file_path], text=True)
    lines = [line for line in output.splitlines() if line and not line.startswith("wtmp begins")]
    records = []
    for line in lines:
        match = re.match(r"(\S+)\s+(\S+)\s+(\S+)\s+(.+)", line)
        if match:
            user, tty, ip, timestamp = match.groups()
            records.append({
                "user": user,
                "tty": tty,
                "ip": ip,
                "timestamp": timestamp.strip()
            })
    return pd.DataFrame(records)

wtmp_df = parse_last_output(log_files["wtmp"])
wtmp_df["timestamp"] = pd.to_datetime(wtmp_df["timestamp"], errors="coerce")
display(wtmp_df.head())

In [None]:
# üìà Plot login timeline
plt.figure(figsize=(12, 4))
sns.histplot(wtmp_df["timestamp"], bins=50, kde=False)
plt.title("User Login Timeline")
plt.xlabel("Time")
plt.ylabel("Login Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# üåç Source IP Frequency
ip_counts = wtmp_df["ip"].value_counts().reset_index()
ip_counts.columns = ["IP Address", "Login Count"]

plt.figure(figsize=(10, 5))
sns.barplot(data=ip_counts.head(10), x="IP Address", y="Login Count")
plt.title("Top Source IPs for Login")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# üîê SUDO / SU Usage from auth.log
def parse_auth_log(path):
    sudo_events = []
    su_events = []
    with open(path) as f:
        for line in f:
            if "sudo:" in line:
                sudo_events.append(line.strip())
            elif "session opened for user" in line and "su" in line:
                su_events.append(line.strip())
    return sudo_events, su_events

sudo_logs, su_logs = parse_auth_log(log_files["auth"])
print(f"SUDO events: {len(sudo_logs)}")
print(f"SU events: {len(su_logs)}")

In [None]:
# üßë‚Äçüíª Shell History Analysis
def parse_shell_history(path):
    if not os.path.exists(path):
        return pd.DataFrame()
    with open(path) as f:
        lines = f.readlines()
    has_timestamps = any(line.startswith("#") for line in lines)
    records = []
    timestamp = None
    for line in lines:
        line = line.strip()
        if line.startswith("#"):
            try:
                timestamp = datetime.fromtimestamp(int(line[1:]))
            except:
                timestamp = None
        elif line:
            records.append({
                "timestamp": timestamp,
                "command": line
            })
    return pd.DataFrame(records)

bash_df = parse_shell_history(log_files["bash_history"])
zsh_df = parse_shell_history(log_files["zsh_history"])
history_df = pd.concat([bash_df, zsh_df]).dropna(subset=["command"])
display(history_df.head())

In [None]:
# üìä Command Frequency
cmd_counts = history_df["command"].str.split().str[0].value_counts().head(10)
cmd_counts.plot(kind="bar", figsize=(10, 4), title="Top Commands Used")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# üïí Optional: Command Usage by Hour
if not history_df.empty:
    history_df["hour"] = history_df["timestamp"].dt.hour
    sns.histplot(history_df["hour"], bins=24)
    plt.title("Command Usage by Hour")
    plt.xlabel("Hour of Day")
    plt.ylabel("Command Count")
    plt.tight_layout()
    plt.show()