In [None]:
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import  accuracy_score
import numpy as np
import hashlib


preprocessing

In [None]:
# -------- CONFIG -------- #
INPUT_FILE = "absher_guardian_50000_no_event.csv"
OUTPUT_FILE = "processed_absher.csv"
ENCODER_FILE = "encoders.pkl"

# -----------------------------------
# STEP 1 — READ RAW DATA
# -----------------------------------
df = pd.read_csv(INPUT_FILE)

required_cols = [
    "UserID", "City", "Age", "DeviceID", "Time",
    "IP_Risk", "Location", "IsKnownDevice",
    "Risk_Score", "Risk_Level", "Action"
]

missing = set(required_cols) - set(df.columns)
if missing:
    raise ValueError(f"Columns missing from CSV: {missing}")

# -----------------------------------
# STEP 2 — CONVERT TIME → HOUR
# -----------------------------------
df["Hour"] = df["Time"].apply(lambda x: int(x.split(":")[0]))

# -----------------------------------
# STEP 3 — ENCODING CATEGORICAL FEATURES
# -----------------------------------
categorical_cols = ["City", "DeviceID", "IP_Risk", "Location", "IsKnownDevice"]
target_cols = ["Risk_Level", "Action"]

encoders = {}

for col in categorical_cols:
    if col == "DeviceID":
        # Hash Encoding 
        df[col] = df[col].apply(lambda x: int(hashlib.md5(x.encode()).hexdigest()[:8], 16))
    else:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        encoders[col] = le

# Encode target columns
for col in target_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le

# -----------------------------------
# STEP 4 — SAVE PROCESSED DATA + ENCODERS
# -----------------------------------
df.to_csv(OUTPUT_FILE, index=False)

with open(ENCODER_FILE, "wb") as f:
    pickle.dump(encoders, f)

print("Preprocessing completed.")
print(f"Processed file saved → {OUTPUT_FILE}")
print(f"Encoders saved → {ENCODER_FILE}")


Preprocessing completed.
Processed file saved → processed_absher.csv
Encoders saved → encoders.pkl


model

In [18]:

# -------- CONFIG -------- #
DATA_FILE = "processed_absher.csv"
ENCODER_FILE = "encoders.pkl"
MODEL_LEVEL = "model_level.pkl"

# -----------------------------------
# STEP 1 — LOAD DATA
# -----------------------------------
df = pd.read_csv(DATA_FILE)

# FEATURES (WITHOUT TARGETS)
X = df[[
    "City", "Age", "DeviceID", "Hour",
    "IP_Risk", "Location", "IsKnownDevice"
]]

# TARGETS
y_level = df["Risk_Level"]

# -----------------------------------
# STEP 2 — TRAIN/VALIDATION SPLIT
# -----------------------------------
X_train, X_val, y_level_train, y_level_val = train_test_split(
    X, y_level,
    test_size=0.2,
    random_state=42
)



# -----------------------------------
# STEP 3 — TRAIN MODEL
# ----------------------------------

print("Training level model...")
model_level = RandomForestClassifier(n_estimators=200)
model_level.fit(X_train, y_level_train)


# -----------------------------------
# STEP 4 — EVALUATE ON VALIDATION SET
# -----------------------------------

y_level_pred = model_level.predict(X_val)
acc_level = accuracy_score(y_level_val, y_level_pred)
print(f"✅ Accuracy Risk_Level: {acc_level:.2f}")


# -----------------------------------
# STEP 5 — SAVE MODEL
# -----------------------------------
pickle.dump(model_level, open(MODEL_LEVEL, "wb"))



Training level model...
✅ Accuracy Risk_Level: 0.91


show data

In [19]:

import pandas as pd
from IPython.display import HTML

df = pd.read_csv("absher_guardian_50000_no_event.csv")

HTML("""
<style>
table.dataframe td, table.dataframe th {
    font-size: 8px !important;
    padding: 2px !important;
    white-space: nowrap !important;
}
</style>
""")

df.head(10)



Unnamed: 0,UserID,City,Age,DeviceID,Service,Time,IP_Risk,Location,IsKnownDevice,Risk_Score,Risk_Level,Action
0,1001,Khobar,63,fp_b231c2dbf2d7,Login,21:01,Medium,Khobar,Yes,20,Low,Allow
1,1001,Khobar,63,fp_a1689753e2ca,Passport_Renew,04:18,Low,Khobar,Yes,30,Low,Allow
2,1001,Khobar,63,fp_a1689753e2ca,Renew_ID,19:41,Low,Dammam,Yes,40,Medium,OTP
3,1001,Khobar,63,fp_b231c2dbf2d7,Renew_ID,07:57,Low,Khobar,Yes,0,Low,Allow
4,1001,Khobar,63,fp_709ae7655c8e,License_Renew,06:47,Low,Khobar,No,50,Medium,OTP
5,1001,Khobar,63,fp_a1689753e2ca,License_Renew,00:45,Low,Khobar,Yes,20,Low,Allow
6,1001,Khobar,63,fp_a1689753e2ca,Transfer_Owner,12:38,Medium,Khobar,Yes,30,Low,Allow
7,1001,Khobar,63,fp_a1689753e2ca,Renew_ID,19:03,Low,Medina,Yes,30,Low,Allow
8,1002,Khobar,66,fp_fed19ebdfb8a,Login,12:33,Medium,Medina,Yes,50,Medium,OTP
9,1002,Khobar,66,fp_fed19ebdfb8a,Car_Sale,02:54,Low,Khobar,Yes,30,Low,Allow
