<a href="https://colab.research.google.com/github/MohammedZuhairAhmed/Salary-prediction-using-skillset/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load your CSV file
csv_file_path = "realspambase.csv"
spambase = pd.read_csv(csv_file_path)

# Assuming the last column is the target column
X = spambase.iloc[:, :-1]
y = spambase.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# K-Nearest Neighbors
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)
knn_classified_data = pd.DataFrame({'Actual': y_test, 'Predicted': knn_predictions})
knn_accuracy = accuracy_score(y_test, knn_predictions)
knn_conf_matrix = confusion_matrix(y_test, knn_predictions)
print("K-Nearest Neighbors Classified Data:")
print(knn_classified_data)

# Artificial Neural Network
ann_model = MLPClassifier()
ann_model.fit(X_train, y_train)
ann_predictions = ann_model.predict(X_test)
ann_classified_data = pd.DataFrame({'Actual': y_test, 'Predicted': ann_predictions})
ann_accuracy = accuracy_score(y_test, ann_predictions)
ann_conf_matrix = confusion_matrix(y_test, ann_predictions)
print("\nArtificial Neural Network Classified Data:")
print(ann_classified_data)

# Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)
nb_classified_data = pd.DataFrame({'Actual': y_test, 'Predicted': nb_predictions})
nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_conf_matrix = confusion_matrix(y_test, nb_predictions)
print("\nNaive Bayes Classified Data:")
print(nb_classified_data)

# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_classified_data = pd.DataFrame({'Actual': y_test, 'Predicted': rf_predictions})
rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_conf_matrix = confusion_matrix(y_test, rf_predictions)
print("\nRandom Forest Classified Data:")
print(rf_classified_data)

accuracies = [knn_accuracy, ann_accuracy, nb_accuracy, rf_accuracy]
print(f"K-Nearest Neighbors Accuracy: {knn_accuracy}")
print(f"Artificial Neural Network Accuracy: {ann_accuracy}")
print(f"Naive Bayes Accuracy: {nb_accuracy}")
print(f"Random Forest Accuracy: {rf_accuracy}")

# Plotting confusion matrices
classifiers = ['K-Nearest Neighbors', 'Artificial Neural Network', 'Naive Bayes', 'Random Forest']
predictions = [knn_predictions, ann_predictions, nb_predictions, rf_predictions]

plt.figure(figsize=(15, 15))
for i in range(4):
    plt.subplot(4, 2, i * 2 + 1)
    cm = confusion_matrix(y_test, predictions[i])
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=['Not Spam', 'Spam'],
                            yticklabels=['Not Spam', 'Spam'])
    plt.title(classifiers[i] + " Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")

# Plotting accuracies
plt.subplot(4, 2, 8)
plt.bar(classifiers, accuracies, color=['blue', 'orange', 'green', 'red'])
plt.title('Classifier Accuracies')
plt.xlabel('Classifier')
plt.ylabel('Accuracy')
plt.ylim(0, 1)  # Set the y-axis limit to 0-1 for accuracy percentage
plt.tight_layout()
plt.show()
