In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import hamming_loss, f1_score
from scipy.sparse import hstack
import tensorflow as tf
from tensorflow.keras import layers, models


In [9]:
df = pd.read_csv("/content/dataset.csv")  # update path

label_cols = [
    'type_blocker',
    'type_regression',
    'type_bug',
    'type_documentation',
    'type_enhancement',
    'type_task',
    'type_dependency_upgrade'
]

X_text = df['report']
y = df[label_cols]


In [10]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X_text)


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)


In [13]:
# Logistic Regression
lr = OneVsRestClassifier(LogisticRegression(max_iter=500, class_weight='balanced'))
lr.fit(X_train, y_train)




In [14]:
#SVM
svm = OneVsRestClassifier(LinearSVC())
svm.fit(X_train, y_train)




In [15]:
#Perceptron
perc = OneVsRestClassifier(Perceptron(max_iter=1, tol=None, eta0=1.0, random_state=42, warm_start=True))
for epoch in range(5):  # Online learning: multiple passes
    perc.fit(X_train, y_train)




In [16]:
#DNN
num_features = X_train.shape[1]
num_labels = y_train.shape[1]

model_dnn = models.Sequential([
    layers.Input(shape=(num_features,)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_labels, activation='sigmoid')
])

model_dnn.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model_dnn.fit(
    X_train.toarray(), y_train,  # convert sparse to dense
    epochs=20,
    batch_size=32,
    validation_data=(X_val.toarray(), y_val)
)


Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.3335 - loss: 0.6476 - val_accuracy: 0.4865 - val_loss: 0.4052
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4485 - loss: 0.3695 - val_accuracy: 0.4865 - val_loss: 0.3202
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4391 - loss: 0.2814 - val_accuracy: 0.4955 - val_loss: 0.2771
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.4076 - loss: 0.2196 - val_accuracy: 0.4955 - val_loss: 0.2374
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.4817 - loss: 0.1426 - val_accuracy: 0.4775 - val_loss: 0.2348
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4549 - loss: 0.1029 - val_accuracy: 0.4595 - val_loss: 0.2355
Epoch 7/20
[1m32/32[0m [32m━━━━

In [17]:
#Haming Loss
pred_lr = lr.predict(X_test)
print("LR Hamming Loss:", hamming_loss(y_test, pred_lr))


LR Hamming Loss: 0.08941418293936279


In [18]:
#Micro-F1 & Macro-F1
print("LR Micro F1:", f1_score(y_test, pred_lr, average='micro'))
print("LR Macro F1:", f1_score(y_test, pred_lr, average='macro'))


LR Micro F1: 0.8462897526501767
LR Macro F1: 0.6286207069132123


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
def precision_at_k(y_true, y_prob, k):
    scores = []
    for t, p in zip(y_true.values, y_prob):
        top_k = np.argsort(p)[-k:]       # highest k probabilities
        score = t[top_k].sum() / k
        scores.append(score)
    return np.mean(scores)

y_prob_lr = lr.predict_proba(X_test)
print("Precision@3:", precision_at_k(y_test, y_prob_lr, 3))
print("Precision@5:", precision_at_k(y_test, y_prob_lr, 5))


Precision@3: 0.645083932853717
Precision@5: 0.4007194244604316
