In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

# Load the training and test data
train_data = pd.read_csv('bugs-train.csv')
test_data = pd.read_csv('bugs-test.csv')

# Preprocess the data
X_train = train_data['summary']
y_train = train_data['severity']
X_test = test_data['summary']

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Create a pipeline to vectorize the text data and train a Random Forest classifier
pipeline = make_pipeline(TfidfVectorizer(), RandomForestClassifier(n_estimators=100, random_state=42))

# Train the model
pipeline.fit(X_train, y_train_encoded)

# Predict the severity of the bugs in the test data
y_test_pred_encoded = pipeline.predict(X_test)
y_test_pred = label_encoder.inverse_transform(y_test_pred_encoded)

# Add the predictions to the test data
test_data['severity'] = y_test_pred
test_data.drop(columns=['summary'], inplace=True)

# Save the predictions to a new CSV file
test_data.to_csv('bugs-test-predictions_rndforest.csv', index=False)

print(test_data)