# Installations
!pip install scikit-learn pandas tensorflow


# Imports

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import hamming_loss, f1_score
import joblib
import tensorflow as tf
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier


# Load dataset

In [10]:
df = pd.read_csv(r"C:\Users\HC\OneDrive\Desktop\Semester 6\Data Science\dataset.csv")
df.fillna(0, inplace=True)

# Features and Labels (adjust column names)

In [11]:
X = df.drop(columns=["type_blocker", "type_regression", "type_bug"])  # Example labels
y = df[["type_blocker", "type_regression", "type_bug"]]

# Scale features

In [12]:
scaler = StandardScaler()
X = X.drop(columns=["report"])
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, "scaler.pkl")  # Save for Streamlit

['scaler.pkl']


# Train-test split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


# Models

In [14]:
models = {
    "Logistic Regression": MultiOutputClassifier(LogisticRegression(max_iter=1000)),
    "SVM": MultiOutputClassifier(SVC(probability=True)),
    "Online Perceptron": MultiOutputClassifier(SGDClassifier(loss='perceptron', eta0=1, learning_rate='constant'))
}

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\n{name} Report:")
    print("Hamming Loss:", hamming_loss(y_test, preds))
    print("Micro-F1:", f1_score(y_test, preds, average="micro"))
    print("Macro-F1:", f1_score(y_test, preds, average="macro"))
    joblib.dump(model, f"{name.lower().replace(' ', '_')}_defect_model.pkl")



Logistic Regression Report:
Hamming Loss: 0.05755395683453238
Micro-F1: 0.8628571428571429
Macro-F1: 0.6705606342528921

SVM Report:
Hamming Loss: 0.05515587529976019
Micro-F1: 0.8707865168539326
Macro-F1: 0.7086784689513291

Online Perceptron Report:
Hamming Loss: 0.06235011990407674
Micro-F1: 0.8433734939759037
Macro-F1: 0.4795458993932276


# DNN for Multi-Label

In [15]:
model_dnn = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_scaled.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(y.shape[1], activation='sigmoid')
])

model_dnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)
model_dnn.save("defect_dnn.h5")

# Evaluate DNN
dnn_preds = (model_dnn.predict(X_test) > 0.5).astype(int)
print("\nDNN Report:")
print("Hamming Loss:", hamming_loss(y_test, dnn_preds))
print("Micro-F1:", f1_score(y_test, dnn_preds, average="micro"))
print("Macro-F1:", f1_score(y_test, dnn_preds, average="macro"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.4153 - loss: 0.4154 - val_accuracy: 0.5135 - val_loss: 0.4959
Epoch 2/20
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6253 - loss: 0.1493 - val_accuracy: 0.4505 - val_loss: 0.4036
Epoch 3/20
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5714 - loss: 0.1225 - val_accuracy: 0.4505 - val_loss: 0.3540
Epoch 4/20
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5297 - loss: 0.1320 - val_accuracy: 0.4595 - val_loss: 0.3125
Epoch 5/20
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5195 - loss: 0.1192 - val_accuracy: 0.4505 - val_loss: 0.2735
Epoch 6/20
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5239 - loss: 0.1078 - val_accuracy: 0.4595 - val_loss: 0.2421
Epoch 7/20
[1m28/28[0m [32m━━━━━━



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step

DNN Report:
Hamming Loss: 0.05515587529976019
Micro-F1: 0.8707865168539326
Macro-F1: 0.7086784689513291
