In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
import nltk
from nltk.corpus import stopwords
from sklearn.pipeline import Pipeline
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords

**Prepare tokenizer, stemmer and stop words**

In [2]:
nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

**Mount the drive**

In [4]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Load the claddifier, TF-IDF vectorizer and label encoder**

In [13]:
# Load the trained model and vectorizer
svm_classifier = joblib.load('/content/drive/My Drive/checkpoints/svm_classifier_model.joblib')
tfidf_vectorizer = joblib.load('/content/drive/My Drive/checkpoints/tfidf_vectorizer.joblib')
label_encoder = joblib.load('/content/drive/My Drive/checkpoints/label_encoder.joblib')

**Remove stop words from the bug report**

In [16]:
def remove_stopwords(tokens):
    return [word for word in tokens if word.lower() not in stop_words]

**Apply stemming on the bug tokens**

In [18]:
def stem_tokens(tokens):
    return [stemmer.stem(word) for word in tokens]

**Get the class using the svm_classifier**

In [22]:
def predict_summary(summary):
    # Preprocess the input summary
    summary_tfidf = tfidf_vectorizer.transform(summary)

    # Predict the class
    predicted_class = svm_classifier.predict(summary_tfidf)

    return predicted_class[0]

**Inference**

In [29]:
# Get the input bug report
bug = input("Enter the bug report")
# Apply tokenization
tokens = word_tokenize(bug)
# Remove stop words
remove_stopwords(tokens)
# Apply stemming
stem_tokens(tokens)
# Predict the class using the svm_classifier after applying TF-IDF
predicted_class = predict_summary(tokens)
print(f"The predicted class is: {predicted_class}")
# Decode the numeric labels back to the original class names
decoded_prediction = label_encoder.inverse_transform([predicted_class])
print(f"The Developer name is: {decoded_prediction}")

Enter the bug reportScrolling with some scroll mice touchpad etc scrolls down but not up Product Version see aboutversion URLs if applicable 0214927Other browsers tested Firefox IE Add OK or FAIL after other browsers where you have tested this issueSafari 3 Firefox 3 OK IE 7OK. What steps will reproduce the problem1 Open any webpage on compaq 6715s running vista2. Try scrolling with the touchpad3 Scrolling down will work  but up will not. What is the expected result. The page to scroll up. What happens instead The page doesnt move. Please provide any additional information below Attach a screenshot if possible. Only a minor bug
The predicted class is: 1339
The Developer name is: ['mdt-papyrus-inbox']


In [None]:
bug = '''Scrolling with some scroll mice touchpad etc scrolls down but not up Product Version
       see aboutversion URLs if applicable 0214927Other browsers tested Firefox
       IE Add OK or FAIL after other browsers where you have tested this issueSafari 3
       Firefox 3 OK
       IE 7OK
       What steps will reproduce the problem1 Open any webpage on compaq 6715s running vista2
       Try scrolling with the touchpad3 Scrolling down will work  but up will not
       What is the expected result
       The page to scroll up
       What happens instead The page doesnt move
       Please provide any additional information below Attach a screenshot if possible
       Only a minor bug'''