In [None]:
#####  MLP CLASSIFIER WITH 10 HIDDEN LAYERS  #####


import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

# Load the training and test data
train_data = pd.read_csv('bugs-train.csv')
test_data = pd.read_csv('bugs-test.csv')

# Preprocess the data
X_train = train_data['summary']
y_train = train_data['severity']
X_test = test_data['summary']

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Create a pipeline to vectorize the text data and train an MLP classifier
pipeline = make_pipeline(TfidfVectorizer(), MLPClassifier(hidden_layer_sizes=(10,), max_iter=10, random_state=42))

# Train the model
pipeline.fit(X_train, y_train_encoded)

# Predict the severity of the bugs in the test data
y_test_pred_encoded = pipeline.predict(X_test)
y_test_pred = label_encoder.inverse_transform(y_test_pred_encoded)

# Add the predictions to the test data
test_data['severity'] = y_test_pred

# Save the predictions to a new CSV file
test_data.to_csv('bugs-test-predictions_mlp.csv', index=False)

print(test_data)



        bug_id                                            summary  severity
0      1143402  Firefox claims to be not the default browser w...    normal
1      1143405  Background of html and body element are not ap...    normal
2      1143409  Mouse input breaks after using window.showModa...    normal
3      1143411  Build failure with next freetype version/curre...    normal
4      1143417  HTML element is not treated as root inside for...    normal
...        ...                                                ...       ...
86089  1426166      Crash in bool IsAboutToBeFinalizedInternal<T>  critical
86090  1426171  Potential crash if GraphRate is greater than 4...    normal
86091  1426173  Crash in <name omitted> | decltype JS::Dispatc...    normal
86092  1426174  Crash in xul.dll@0x28145fa | xul.dll@0x3c748ff...  critical
86093  1426176  No symbols for clang_rt.asan_dynamic-x86_64.dl...    normal

[86094 rows x 3 columns]


In [None]:
######   GRADIENT BOOST  ######

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

# Load the training and test data
train_data = pd.read_csv('bugs-train.csv')
test_data = pd.read_csv('bugs-test.csv')

# Preprocess the data
X_train = train_data['summary']
y_train = train_data['severity']
X_test = test_data['summary']

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Create a pipeline to vectorize the text data and train a Gradient Boosting classifier
pipeline = make_pipeline(TfidfVectorizer(), GradientBoostingClassifier(n_estimators=100, random_state=42))

# Train the model
pipeline.fit(X_train, y_train_encoded)

# Predict the severity of the bugs in the test data
y_test_pred_encoded = pipeline.predict(X_test)
y_test_pred = label_encoder.inverse_transform(y_test_pred_encoded)

# Add the predictions to the test data
test_data['severity'] = y_test_pred

# Save the predictions to a new CSV file
test_data.to_csv('bugs-test-predictions_gradientboost.csv', index=False)

print(test_data)

        bug_id                                            summary  severity
0      1143402  Firefox claims to be not the default browser w...    normal
1      1143405  Background of html and body element are not ap...    normal
2      1143409  Mouse input breaks after using window.showModa...    normal
3      1143411  Build failure with next freetype version/curre...    normal
4      1143417  HTML element is not treated as root inside for...    normal
...        ...                                                ...       ...
86089  1426166      Crash in bool IsAboutToBeFinalizedInternal<T>  critical
86090  1426171  Potential crash if GraphRate is greater than 4...  critical
86091  1426173  Crash in <name omitted> | decltype JS::Dispatc...  critical
86092  1426174  Crash in xul.dll@0x28145fa | xul.dll@0x3c748ff...  critical
86093  1426176  No symbols for clang_rt.asan_dynamic-x86_64.dl...    normal

[86094 rows x 3 columns]


In [None]:
#####  MLP CLASSIFIER WITH 100 HIDDEN LAYERS  #####

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

# Load the training and test data
train_data = pd.read_csv('bugs-train.csv')
test_data = pd.read_csv('bugs-test.csv')

# Preprocess the data
X_train = train_data['summary']
y_train = train_data['severity']
X_test = test_data['summary']

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Create a pipeline to vectorize the text data and train an MLP classifier
pipeline = make_pipeline(TfidfVectorizer(), MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42))

# Train the model
pipeline.fit(X_train, y_train_encoded)

# Predict the severity of the bugs in the test data
y_test_pred_encoded = pipeline.predict(X_test)
y_test_pred = label_encoder.inverse_transform(y_test_pred_encoded)

# Add the predictions to the test data
test_data['severity'] = y_test_pred
test_data.drop(columns=['summary'], inplace=True)

# Save the predictions to a new CSV file
test_data.to_csv('bugs-test-predictions_mlp.csv', index=False)

print(test_data)