In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.pipeline import make_pipeline

# Load the dataset (replace 'your_file.csv' with the actual file path)
df = pd.read_csv('data.csv')

# Split data into features (X) and target (y)
X = df['Incident Description']
y_score = df['Severity Score']  # For regression task (predicting severity score)
y_level = df['Severity Level']  # For classification task (predicting severity level)

# Split data into train and test sets
X_train, X_test, y_train_score, y_test_score, y_train_level, y_test_level = train_test_split(
    X, y_score, y_level, test_size=0.2, random_state=42)

# Initialize a TfidfVectorizer to convert text data into numeric features
vectorizer = TfidfVectorizer(max_features=1000)

# Convert text data into numeric features using TF-IDF
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# **Model 1: Regression Task (Predict Severity Score)**
# Using RandomForestRegressor for predicting the severity score
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train_vec, y_train_score)

# Predict severity scores
y_pred_score = regressor.predict(X_test_vec)

# Evaluate the model performance
mse = mean_squared_error(y_test_score, y_pred_score)
print(f'Mean Squared Error for Severity Score (Regression): {mse}')

# **Model 2: Classification Task (Predict Severity Level)**
# Encode severity levels into numerical values
label_encoder = LabelEncoder()
y_train_level_encoded = label_encoder.fit_transform(y_train_level)
y_test_level_encoded = label_encoder.transform(y_test_level)

# Using RandomForestClassifier for predicting the severity level
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train_vec, y_train_level_encoded)

# Predict severity levels
y_pred_level = classifier.predict(X_test_vec)

# Evaluate the classification performance
accuracy = accuracy_score(y_test_level_encoded, y_pred_level)
print(f'Accuracy for Severity Level (Classification): {accuracy}')


Mean Squared Error for Severity Score (Regression): 144.44219523809525
Accuracy for Severity Level (Classification): 0.7619047619047619


In [None]:
!pip install pickle

[31mERROR: Could not find a version that satisfies the requirement pickle (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pickle[0m[31m
[0m

In [None]:
import pickle

# Save the trained models (both regressor and classifier)
with open('severity_score_model.pkl', 'wb') as f:
    pickle.dump(regressor, f)  # Save the regressor

with open('severity_level_model.pkl', 'wb') as f:
    pickle.dump(classifier, f)  # Save the classifier

# Optionally, save the vectorizer as well
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)
