# Bug Severity Prediction Model â€” Week 4 Assignment

In [None]:
# Install dependencies (Colab only)
# !pip install pandas scikit-learn joblib nltk

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

## Load Dataset

In [None]:
df = pd.read_csv('data/github_issues.csv')
df.head()

## Preprocessing & TF-IDF Vectorization

In [None]:
X = df['issue_text'].astype(str)
y = df['severity'].astype(str)

vectorizer = TfidfVectorizer(stop_words='english', max_df=0.9, min_df=2)
X_vect = vectorizer.fit_transform(X)

## Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_vect, y, test_size=0.2, random_state=42
)

## Train Model

In [None]:
model = LogisticRegression(max_iter=300)
model.fit(X_train, y_train)

## Evaluation

In [None]:
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

## Save Model

In [None]:
joblib.dump({'vectorizer': vectorizer, 'model': model}, 'model.joblib')
print('Model saved as model.joblib')

## Test Model With Custom Inputs

In [None]:
sample_issues = [
    'App crashes on startup after update',
    'Missing comma in documentation',
]

sample_vect = vectorizer.transform(sample_issues)
print(model.predict(sample_vect))