In [31]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install scikit-learn==0.24.1


Collecting scikit-learn==0.24.1
  Downloading scikit-learn-0.24.1.tar.gz (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-learn
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for scikit-learn [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for scikit-learn (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for scikit-learn[0m[31m
[0mFailed to build scikit-learn
[31mERROR: Could not build wheels for

In [48]:
%%writefile predict_developers.py

import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import joblib
import sys
import json
import numpy as np

# Remove stop words from the bug report
def remove_stopwords(tokens, stop_words):
    return [word for word in tokens if word.lower() not in stop_words]

# Apply stemming on the bug tokens
def stem_tokens(tokens, stemmer):
    return [stemmer.stem(word) for word in tokens]

# Get the class using the svm_classifier
def predict_summary(summary, tfidf_vectorizer, svm_classifier):
    # Preprocess the input summary
    summary_tfidf = tfidf_vectorizer.transform(summary)

    # Predict the class
    predicted_class = svm_classifier.predict(summary_tfidf)

    return predicted_class[0]

# Define the prediction function to get top 5 classes
def predict_top_5_classes(summary, tfidf_vectorizer, svm_classifier, label_encoder):
    summary_tfidf = tfidf_vectorizer.transform([' '.join(summary)])
    probabilities = svm_classifier.predict_proba(summary_tfidf)[0]
    top_5_indices = np.argsort(probabilities)[-5:][::-1]
    return set(top_5_indices)

def Inference(bug_report, stop_words, stemmer, tfidf_vectorizer, svm_classifier, label_encoder):
    # Apply tokenization
    tokens = word_tokenize(bug_report)
    # Remove stop words
    remove_stopwords(tokens, stop_words)
    # Apply stemming
    stem_tokens(tokens, stemmer)
    # Predict the top 5 classes using the svm_classifier after applying TF-IDF
    top_5_classes = predict_top_5_classes(tokens, tfidf_vectorizer, svm_classifier, label_encoder)

    return top_5_classes

def main():
    # Prepare tokenizer, stemmer and stop words**
    nltk.download('punkt')
    nltk.download('stopwords')
    stop_words = set(stopwords.words('english'))
    stemmer = PorterStemmer()

    # Load the claddifier, TF-IDF vectorizer and label encoder
    svm_classifier = joblib.load('/content/drive/My Drive/checkpoints/svm_classifier_model_with_probability.joblib')
    tfidf_vectorizer = joblib.load('/content/drive/My Drive/checkpoints/tfidf_vectorizer.joblib')
    label_encoder = joblib.load('/content/drive/My Drive/checkpoints/label_encoder.joblib')

    # Prepare the data
    data = json.loads(sys.argv[1])
    # Extract bug description
    bug_description = data["bugDescription"]

    input_bug_top_5_classes = Inference(bug_description, stop_words, stemmer, tfidf_vectorizer, svm_classifier, label_encoder)

    # Create a dictionary to store the results
    developers_bugs_classes = {}

    developers_with_no_bugs = set()
    # Extract developers' data
    for developer in data["developersData"]:
        developer_id = developer["developerID"]
        old_bugs = developer["oldBugsDescription"]
        # Check if the bug list is empty
        if not old_bugs:
            developers_with_no_bugs.add(developer_id)
            continue

        common_classes_count = 0
        for bug in old_bugs:
            predicted_top_5_classes = Inference(bug, stop_words, stemmer, tfidf_vectorizer, svm_classifier, label_encoder)
            common_classes = input_bug_top_5_classes.intersection(predicted_top_5_classes)
            common_classes_count += len(common_classes)
        developers_bugs_classes[developer_id] = common_classes_count

    # Sort developers by common class counts in descending order
    sorted_developers = sorted(developers_bugs_classes.items(), key=lambda item: item[1], reverse=True)

    # Get the top developers (at most 5)
    num_top_developers = min(5, len(sorted_developers))

    # Get the top 5 developers
    recommended_developers = [developer_id for developer_id, _ in sorted_developers[:num_top_developers]]

    # If there are developers that didn't solve bugs before ==> add them at the end of the list so they try to solve bugs
    if developers_with_no_bugs:
      if(len(recommended_developers)==5):
        # replace the last recommendation in the recommended develoeprs with a developer from the team that didn't solve a bug before
        recommended_developers[4] = developers_with_no_bugs.pop()
      else:
        # if the recommended_developers list is less than 5 recommendations ==> add developers with no bugs till their set ends or the recommendations list reaches 5 developers
        while(len(recommended_developers) < 5 and developers_with_no_bugs):
          recommended_developers.append(developers_with_no_bugs.pop())

    # Print the results
    print("Recommended Developers:", recommended_developers)

if __name__ == "__main__":
    main()

Overwriting predict_developers.py


In [47]:
!python predict_developers.py '{"bugDescription": "Maximize on second larger monitor not working ", "developersData": [{"developerID": "667a76f471631147e0b6e0d", "jobTitle": "Backend Developer", "oldBugsDescription": []},{"developerID": "667a76f471631147e0b6e0ddd", "jobTitle": "Backend Developer", "oldBugsDescription": []},{"developerID": "667a76f471631147e0b6e0dbbd", "jobTitle": "Backend Developer", "oldBugsDescription": []},{"developerID": "667a76f471631147e0b6e0deedd", "jobTitle": "Backend Developer", "oldBugsDescription": []},{"developerID": "667a760a6da0c47fe0a327cd", "jobTitle": "Backend Developer", "oldBugsDescription": ["Maximize on second larger monitor not working", "the font size is very small"]}, {"developerID": "667a76f471631147e0b6e0d6", "jobTitle": "Backend Developer", "oldBugsDescription": ["Manual guide installation is not clear"]}]}'

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
Input Bug Top 5 Classes: {1536, 1, 1445, 726, 440}
Developers with no bugs: {'667a76f471631147e0b6e0deedd'}
Developers' Bugs Classes and Common Counts: {'667a760a6da0c47fe0a327cd': 5, '667a76f471631147e0b6e0d6': 0}
Recommended Developers: ['667a760a6da0c47fe0a327cd', '667a76f471631147e0b6e0d6', '667a76f471631147e0b6e0d', '667a76f471631147e0b6e0ddd', '667a76f471631147e0b6e0dbbd']
