In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest

# 1. Create Data (Mostly Safe Users, a few Hackers mixed in)
np.random.seed(42)
safe_users = np.random.normal(100, 10, (200, 2))  # Clumped together
hackers    = np.random.uniform(200, 300, (20, 2)) # Floating far away
X = np.concatenate([safe_users, hackers])

# 2. Train the Isolation Model (NOTICE: No 'y' labels!)
# contamination=0.1 means "I guess about 10% of this data is bad"
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(X)

# 3. Predict
# The output is weird: 
#   1  = Safe
#  -1  = Anomaly (Hacker)
predictions = model.predict(X)

# Let's count them
df = pd.DataFrame(X, columns=['Dwell', 'Flight'])
df['Verdict'] = predictions

print(df['Verdict'].value_counts())
# You should see about ~20 "-1"s (The Hackers) and ~200 "1"s (The Safe Users)

Verdict
 1    198
-1     22
Name: count, dtype: int64


In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest

np.random.seed(999) # Different seed this time!

# 1. Generate 475 Safe Users (Tight cluster around 120ms)
safe_data = np.random.normal(120, 15, (475, 2))

# 2. Generate 25 "Sleeper Bots" (Super consistent, unnaturally fast at 50ms)
# Bots are often "too perfect" or "too fast"
bot_data = np.random.normal(50, 5, (25, 2))

# 3. Mix them together & Shuffle
X = np.concatenate([safe_data, bot_data])
np.random.shuffle(X) # Now we have lost track of who is who!

# Convert to DataFrame for easier viewing
df = pd.DataFrame(X, columns=['Dwell_Time', 'Flight_Time'])

print(f'Loaded {len(df)} logs. Searching for anomalies...')

Loaded 500 logs. Searching for anomalies...


In [10]:
model = IsolationForest(contamination=0.05, random_state=42)
model.fit(X) #model to remember

predictions = model.predict(X)

# -1 is the code for Anomaly (Hacker)
hackers_count = list(predictions).count(-1)

df = pd.DataFrame(X, columns=['Dwell_Time', 'Flight_Time'])
df['Anomalies'] = predictions
print(df['Anomalies'].value_counts())
print(f"I caught {hackers_count} hackers")

Anomalies
 1    475
-1     25
Name: count, dtype: int64
I caught 25 hackers


In [11]:
import matplotlib.pyplot as plt

# Create the plot
plt.figure(figsize=(10, 6))

# 1. Plot the Normal Users (Verdict == 1)
normal = df[df['Verdict'] == 1]
plt.scatter(normal['Dwell_Time'], normal['Flight_Time'], c='blue', label='Safe Staff', alpha=0.5)

# 2. Plot the Hackers (Verdict == -1)
hackers = df[df['Verdict'] == -1]
plt.scatter(hackers['Dwell_Time'], hackers['Flight_Time'], c='red', label='Anomalies', s=100) # s=100 makes them big

plt.title("The Isolation Forest Result")
plt.xlabel("Dwell Time (ms)")
plt.ylabel("Flight Time (ms)")
plt.legend()
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [12]:
!pip i matplotlib

'pip' is not recognized as an internal or external command,
operable program or batch file.
