In [None]:
import pandas as pd
import joblib
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the processed data
processed_data_path = '/content/drive/MyDrive/Project-Name/text_classification/data/processed/'
X_train = pd.read_csv(processed_data_path + 'X_train.csv').values
X_test = pd.read_csv(processed_data_path + 'X_test.csv').values
y_train = pd.read_csv(processed_data_path + 'y_train.csv').values.ravel()
y_test = pd.read_csv(processed_data_path + 'y_test.csv').values.ravel()

# Define individual models
nb = MultinomialNB()
svm = SVC(kernel='linear', probability=True)
dt = DecisionTreeClassifier()

# Create an ensemble of models
ensemble_model = VotingClassifier(estimators=[('nb', nb), ('svm', svm), ('dt', dt)], voting='soft')
ensemble_model.fit(X_train, y_train)

# Save the trained ensemble model
model_path = '/content/drive/MyDrive/Project-Name/text_classification/models/ensemble_model.pkl'
joblib.dump(ensemble_model, model_path)

# Evaluate the model
y_pred = ensemble_model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Save the results
results_path = '/content/drive/MyDrive/Project-Name/results/text_classification_ensemble_results.md'
with open(results_path, 'w') as f:
    f.write(f"Accuracy: {accuracy:.4f}\n")
    f.write(f"Precision: {precision:.4f}\n")
    f.write(f"Recall: {recall:.4f}\n")
    f.write(f"F1 Score: {f:.4f}\n")
    f.write(f"\nConfusion Matrix:\n{cm}\n")

print("Evaluation completed and results saved.")
