<a href="https://colab.research.google.com/github/Bhanugt/decisiontree-impact-prediction/blob/main/decisiontree_impact_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import joblib
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# ✅ Load the actual Incident dataset
df = pd.read_csv("/content/Incident_Event_Log.csv")  # Update with the correct file path

# ✅ Check dataset structure
print(df.head())

# ✅ Encode categorical variables
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le  # Store encoders for later use

# ✅ Define Features (X) and Target (y)
target_col = "impact"  # Ensure this is the correct target column name
X = df.drop(columns=[target_col])
y = df[target_col]

# ✅ Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ✅ Train Decision Tree Model
model = DecisionTreeClassifier(max_depth=5, random_state=42)
model.fit(X_train_scaled, y_train)

# ✅ Make Predictions
y_pred = model.predict(X_test_scaled)

# ✅ Calculate Model Performance Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)

# ✅ Print Metrics
print("\n🎯 Model Performance Metrics:")
print(f"✅ Accuracy:  {accuracy:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall:    {recall:.4f}")
print(f"✅ F1 Score:  {f1:.4f}")
print("\n🔹 Classification Report:\n", classification_report(y_test, y_pred, zero_division=1))

# ✅ Save Model, Scaler, and Label Encoders
joblib.dump(model, "decision_tree_impact.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

print("✅ Model, Scaler, and Encoders Saved Successfully!")


           ID           ID_status active  count_reassign  count_opening  \
0  INC0000001              Closed  False             1.0            0.0   
1  INC0000002              Closed  False             1.0            0.0   
2  INC0000003    Awaiting Problem   True             2.0            0.0   
3  INC0000004  Awaiting User Info   True             4.0            0.0   
4  INC0000005              Closed   True             3.0            0.0   

   count_updated    ID_caller       opened_by          opened_time  \
0           18.0   Caller 917  Opened by  180  2016-02-29 08:22:00   
1            3.0  Caller 4907  Opened by  131  2016-02-29 09:15:00   
2            5.0  Caller 4539  Opened by  433  2016-02-29 09:40:00   
3            7.0  Caller 3304   Opened by  24  2016-02-29 08:23:00   
4           18.0  Caller 5540  Opened by  180  2016-02-29 08:56:00   

       Created_by  ...      Category Id user_symptom Support_group  \
0   Created by 81  ...    Subcategory 9    Symptom 2      

In [4]:
from google.colab import files

# ✅ Download the trained model
files.download("decision_tree_impact.pkl")

# ✅ Download the scaler
files.download("scaler.pkl")

# ✅ Download the label encoders
files.download("label_encoders.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [5]:
pip install streamlit joblib pandas numpy scikit-learn


Collecting streamlit
  Downloading streamlit-1.43.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.2-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m94.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[