In [2]:
import pandas as pd

In [3]:
incident_df=pd.read_csv("D:\\PROJECT\\IT SERVICE\\data\\processed\\ucl_incident_master.csv")

In [4]:
incident_df.head()

Unnamed: 0,number,opened_at,resolved_at,closed_at,made_sla,reassignment_count,reopen_count,sys_mod_count,category,subcategory,impact,urgency,priority,assignment_group,u_symptom,cmdb_ci,resolution_time_hrs
0,INC0000045,2016-02-29 01:16:00,2016-02-29 11:29:00,2016-03-05 12:00:00,True,0,0,4,Category 55,Subcategory 170,2 - Medium,2 - Medium,3 - Moderate,Group 56,Symptom 72,?,10.216667
1,INC0000047,2016-02-29 04:40:00,2016-03-01 09:52:00,2016-03-06 10:00:00,True,1,0,8,Category 40,Subcategory 215,2 - Medium,2 - Medium,3 - Moderate,Group 70,Symptom 471,?,29.2
2,INC0000057,2016-02-29 06:10:00,2016-03-01 02:55:00,2016-03-06 03:00:00,True,0,0,6,Category 20,Subcategory 125,2 - Medium,2 - Medium,3 - Moderate,Group 70,Symptom 471,?,20.75
3,INC0000060,2016-02-29 06:38:00,2016-03-02 12:06:00,2016-03-07 13:00:00,True,0,0,3,Category 9,Subcategory 97,2 - Medium,2 - Medium,3 - Moderate,Group 25,Symptom 450,?,53.466667
4,INC0000062,2016-02-29 06:58:00,2016-02-29 15:51:00,2016-03-05 16:00:00,False,1,0,7,Category 53,Subcategory 168,2 - Medium,2 - Medium,3 - Moderate,Group 70,Symptom 232,?,8.883333


In [5]:
# define atrget 
# Target: 1 = SLA breached, 0 = SLA met
incident_df["sla_breached"] = (~incident_df["made_sla"]).astype(int)

incident_df["sla_breached"].value_counts(normalize=True)


sla_breached
0    0.6342
1    0.3658
Name: proportion, dtype: float64

In [6]:
# feature selection 
FEATURE_COLS = [
    "priority",
    "impact",
    "urgency",
    "category",
    "subcategory",
    "opened_at"
]

df = incident_df[FEATURE_COLS + ["sla_breached"]].copy()


time feature engineering(inference-safe)

In [7]:
df["opened_at"] = pd.to_datetime(df["opened_at"], errors="coerce")

df["hour"] = df["opened_at"].dt.hour
df["day_of_week"] = df["opened_at"].dt.dayofweek
df["is_weekend"] = df["day_of_week"].isin([5, 6]).astype(int)

df.drop(columns=["opened_at"], inplace=True)


In [8]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["sla_breached"])
y = df["sla_breached"]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [9]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression


In [10]:
categorical_features = [
    "priority",
    "impact",
    "urgency",
    "category",
    "subcategory"
]

numeric_features = [
    "hour",
    "day_of_week",
    "is_weekend"
]

categorical_pipeline = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore"))
    ]
)

numeric_pipeline = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="median"))
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", categorical_pipeline, categorical_features),
        ("num", numeric_pipeline, numeric_features)
    ]
)

model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", LogisticRegression(
            max_iter=1000,
            class_weight="balanced",
            n_jobs=-1
        ))
    ]
)


In [11]:
model.fit(X_train, y_train)
print(" SLA breach baseline trained successfully")




 SLA breach baseline trained successfully


In [12]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)

print("Classification Report:\n")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.71      0.73      3161
           1       0.55      0.61      0.58      1823

    accuracy                           0.67      4984
   macro avg       0.65      0.66      0.66      4984
weighted avg       0.68      0.67      0.68      4984


Confusion Matrix:

[[2244  917]
 [ 711 1112]]


In [13]:
import numpy as np
from sklearn.metrics import classification_report

y_proba = model.predict_proba(X_test)[:, 1]

for thresh in [0.5, 0.4, 0.3]:
    y_pred_thresh = (y_proba >= thresh).astype(int)
    print(f"\nThreshold = {thresh}")
    print(classification_report(y_test, y_pred_thresh))



Threshold = 0.5
              precision    recall  f1-score   support

           0       0.76      0.71      0.73      3161
           1       0.55      0.61      0.58      1823

    accuracy                           0.67      4984
   macro avg       0.65      0.66      0.66      4984
weighted avg       0.68      0.67      0.68      4984


Threshold = 0.4
              precision    recall  f1-score   support

           0       0.83      0.46      0.60      3161
           1       0.47      0.83      0.60      1823

    accuracy                           0.60      4984
   macro avg       0.65      0.65      0.60      4984
weighted avg       0.70      0.60      0.60      4984


Threshold = 0.3
              precision    recall  f1-score   support

           0       0.89      0.28      0.43      3161
           1       0.43      0.94      0.59      1823

    accuracy                           0.52      4984
   macro avg       0.66      0.61      0.51      4984
weighted avg       0.72



  (at threshold 0.4)



In [None]:
# Recall =0.83 (for sla breach)
# OUt of all incidents that breached sla, 83% were correctly identified by the model at threshold 0.4

In [None]:
# Missing a breach = angry customer + SLA penalty

# False alarm = extra attention (acceptable)

High recall is the business goal.

In [None]:
# Precision = 0.47â€“0.55

In [None]:
# Some tickets we flag as risky will not actually breach.

In [15]:
import os
from pathlib import Path

# Create models folder if missing
MODELS_DIR = Path("../models")
MODELS_DIR.mkdir(exist_ok=True)

MODEL_PATH = MODELS_DIR / "sla_breach_model.joblib"
joblib.dump(model, MODEL_PATH)
print(f"SLA model saved at: {MODEL_PATH.absolute()}")


SLA model saved at: d:\PROJECT\IT SERVICE\notebooks\..\models\sla_breach_model.joblib


In [19]:
import json

config = {
    "sla_breach_threshold": 0.4,
    "positive_class": "SLA_BREACH",
    "note": "Threshold chosen to maximize breach recall"
}

with open("../models/sla_model_config.json", "w") as f:
    json.dump(config, f, indent=2)
