In [1]:
import pandas as pd


In [2]:
df=pd.read_csv("cleaned_tasks.csv")

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['Description'])

# For task classification
y_task = df['Task_Type_Code']

# For priority prediction
y_priority = df['Priority_code']


In [4]:
from sklearn.model_selection import train_test_split

X_train_task, X_test_task, y_train_task, y_test_task = train_test_split(
    X, y_task, test_size=0.2, random_state=42
)

X_train_priority, X_test_priority, y_train_priority, y_test_priority = train_test_split(
    X, y_priority, test_size=0.2, random_state=42
)


In [5]:
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

svm_model = LinearSVC()
svm_model.fit(X_train_task, y_train_task)

y_pred_svm = svm_model.predict(X_test_task)

print("SVM - Task Type Classification:")
print("Accuracy:", accuracy_score(y_test_task, y_pred_svm))
print(classification_report(y_test_task, y_pred_svm))


SVM - Task Type Classification:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        13
           3       1.00      1.00      1.00        10
           4       1.00      1.00      1.00         9
           5       1.00      1.00      1.00        10
           6       1.00      1.00      1.00        15
           7       1.00      1.00      1.00         9
           8       1.00      1.00      1.00         9

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100



In [6]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()
rf_model.fit(X_train_priority, y_train_priority)

y_pred_rf = rf_model.predict(X_test_priority)

print("Random Forest - Priority Prediction:")
print("Accuracy:", accuracy_score(y_test_priority, y_pred_rf))
print(classification_report(y_test_priority, y_pred_rf))


Random Forest - Priority Prediction:
Accuracy: 0.57
              precision    recall  f1-score   support

           0       0.66      0.81      0.72        67
           1       0.09      0.07      0.08        14
           2       0.29      0.11      0.15        19

    accuracy                           0.57       100
   macro avg       0.35      0.33      0.32       100
weighted avg       0.51      0.57      0.53       100



In [8]:
import joblib


joblib.dump(svm_model, 'task_model.pkl')

joblib.dump(rf_model, 'priority_model.pkl')

joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')


['tfidf_vectorizer.pkl']