In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier

# Load the training data
train_data = pd.read_csv('bugs-train.csv')
test_data = pd.read_csv('bugs-test.csv')

# Preprocess the data
# Combine summary and bug_id into a single feature (optional)
train_data['text'] = train_data['bug_id'].astype(str) + ' ' + train_data['summary']
test_data['text'] = test_data['bug_id'].astype(str) + ' ' + test_data['summary']

# Encode the target variable
label_encoder = LabelEncoder()
train_data['severity_encoded'] = label_encoder.fit_transform(train_data['severity'])

# Define the feature and target variable
X_train = train_data['text']
y_train = train_data['severity_encoded']
X_test = test_data['text']

# Create pipelines for both classifiers
pipeline_lr = Pipeline([
    ('vectorizer', TfidfVectorizer()),
    ('classifier', LogisticRegression(max_iter=1000))
])

pipeline_rf = Pipeline([
    ('vectorizer', TfidfVectorizer()),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Create a voting classifier with weighted voting
voting_clf = VotingClassifier(
    estimators=[('lr', pipeline_lr), ('rf', pipeline_rf)],
    voting='soft',
    weights=[1, 2]  # Assign weights to the classifiers
)

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Predict the severity for the test data
predictions = voting_clf.predict(X_test)

# Decode the predicted labels
predicted_severities = label_encoder.inverse_transform(predictions)

# Add predictions to the test data
test_data['severity'] = predicted_severities

# Save the predictions to a CSV file
test_data[['bug_id', 'severity']].to_csv('predicted_severities.csv', index=False)

print("Predictions saved to 'predicted_severities.csv'")

Predictions saved to 'predicted_severities.csv'
