In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

heart_data = pd.read_csv('/content/drive/MyDrive/Machine Learning data sets/heart.csv')
music_data = pd.read_csv('/content/drive/MyDrive/Machine Learning data sets/music.csv')

heart_data.drop_duplicates(inplace=True)
music_data.drop_duplicates(inplace=True)

X = heart_data.iloc[:, :-1]
y = heart_data.iloc[:, -1]
p = music_data.iloc[:, 1:-1]
q = music_data.iloc[:,0]

scaler = StandardScaler()
X = scaler.fit_transform(X)
p = scaler.fit_transform(p)

le = LabelEncoder()
y = le.fit_transform(y)
q = le.fit_transform(q)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
p_train, p_test, q_train, q_test = train_test_split(p, q, test_size=0.2, random_state=42)


In [3]:
from sklearn.naive_bayes import GaussianNB

nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

nb_y_pred = nb_classifier.predict(X_test)

print("Naive Bayes Results:\n")
print("Heart disease data set\n")
print("confusion_matrix:")
print(confusion_matrix(y_test, nb_y_pred))
print("Accuracy:", accuracy_score(y_test, nb_y_pred))
print("Precision:", precision_score(y_test, nb_y_pred))
print("Recall:", recall_score(y_test, nb_y_pred))


Naive Bayes Results:

Heart disease data set

confusion_matrix:
[[27  5]
 [ 4 25]]
Accuracy: 0.8524590163934426
Precision: 0.8333333333333334
Recall: 0.8620689655172413


In [4]:
nb_classifier.fit(p_train, q_train)
nb_q_pred = nb_classifier.predict(p_test)

print("Naive Bayes Results:\n")
print("Music data set\n")
print("confusion_matrix:")
print(confusion_matrix(q_test, nb_q_pred))
print("Accuracy:", accuracy_score(q_test, nb_q_pred))
print("Precision:", precision_score(q_test, nb_q_pred,average='micro'))
print("Recall:", recall_score(q_test, nb_q_pred,average='micro'))

Naive Bayes Results:

Music data set

confusion_matrix:
[[16  2  0  0]
 [ 1 18  1  0]
 [ 0  1 21  6]
 [ 1  2  3  6]]
Accuracy: 0.782051282051282
Precision: 0.782051282051282
Recall: 0.782051282051282


In [5]:
from sklearn.tree import DecisionTreeClassifier

dt_classifier = DecisionTreeClassifier(criterion='entropy', random_state=42)
dt_classifier.fit(X_train, y_train)

dt_y_pred = dt_classifier.predict(X_test)

print("Decision Tree Results:\n")
print("Heart disease data set\n")
print("confusion_matrix:")
print(confusion_matrix(y_test, dt_y_pred))
print("Accuracy:", accuracy_score(y_test, dt_y_pred))
print("Precision:", precision_score(y_test, dt_y_pred))
print("Recall:", recall_score(y_test, dt_y_pred))

Decision Tree Results:

Heart disease data set

confusion_matrix:
[[22 10]
 [ 7 22]]
Accuracy: 0.7213114754098361
Precision: 0.6875
Recall: 0.7586206896551724


In [6]:
dt_classifier.fit(p_train, q_train)
dt_q_pred = dt_classifier.predict(p_test)

print("Decision Tree Results:\n")
print("Music data set:\n ")
print("confusion matrix: ")
print(confusion_matrix(q_test, dt_q_pred))
print("Accuracy:", accuracy_score(q_test, dt_q_pred))
print("Precision:", precision_score(q_test, dt_q_pred,average='micro'))
print("Recall:", recall_score(q_test, dt_q_pred,average='micro'))


Decision Tree Results:

Music data set:
 
confusion matrix: 
[[14  1  0  3]
 [ 3 17  0  0]
 [ 5  1 10 12]
 [ 3  2  1  6]]
Accuracy: 0.6025641025641025
Precision: 0.6025641025641025
Recall: 0.6025641025641025


In [7]:
from sklearn.linear_model import LogisticRegression

lr_classifier = LogisticRegression(random_state=42)
lr_classifier.fit(X_train, y_train)

lr_y_pred = lr_classifier.predict(X_test)

print("Logistic Regression Results:")
print("Heart disease data set\n")
print("confusion_matrix:")
print(confusion_matrix(y_test, lr_y_pred))
print("Accuracy:", accuracy_score(y_test, lr_y_pred))
print("Precision:", precision_score(y_test, lr_y_pred))
print("Recall:", recall_score(y_test, lr_y_pred))


Logistic Regression Results:
Heart disease data set

confusion_matrix:
[[21 11]
 [ 3 26]]
Accuracy: 0.7704918032786885
Precision: 0.7027027027027027
Recall: 0.896551724137931


In [8]:
lr_classifier.fit(p_train, q_train)
lr_q_pred = lr_classifier.predict(p_test)

print("Logistic regression Results:\n")
print("Music data set:\n ")
print("confusion matrix: ")
print(confusion_matrix(q_test, lr_q_pred))
print("Accuracy:", accuracy_score(q_test, lr_q_pred))
print("Precision:", precision_score(q_test, lr_q_pred, average='micro'))
print("Recall:", recall_score(q_test, lr_q_pred, average='micro'))

Logistic regression Results:

Music data set:
 
confusion matrix: 
[[16  0  0  2]
 [ 1 15  1  3]
 [ 1  0 18  9]
 [ 2  1  1  8]]
Accuracy: 0.7307692307692307
Precision: 0.7307692307692307
Recall: 0.7307692307692307


In [9]:
from sklearn.neighbors import KNeighborsClassifier

knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)

knn_y_pred = knn_classifier.predict(X_test)

print("K-Nearest Neighbors Results:\n")
print("Heart disease data set\n")
print("confusion_matrix:")
print(confusion_matrix(y_test, knn_y_pred))
print("Accuracy:", accuracy_score(y_test, knn_y_pred))
print("Precision:", precision_score(y_test, knn_y_pred))
print("Recall:", recall_score(y_test, knn_y_pred))


K-Nearest Neighbors Results:

Heart disease data set

confusion_matrix:
[[21 11]
 [ 5 24]]
Accuracy: 0.7377049180327869
Precision: 0.6857142857142857
Recall: 0.8275862068965517


In [10]:
knn_classifier.fit(p_train, q_train)
knn_q_pred = knn_classifier.predict(p_test)

print("K-Nearest Neighbors Results:\n")
print("Music data set\n")
print("confusion_matrix:")
print(confusion_matrix(q_test, knn_q_pred))
print("Accuracy:", accuracy_score(q_test, knn_q_pred))
print("Precision:", precision_score(q_test, knn_q_pred, average='micro'))
print("Recall:", recall_score(q_test, knn_q_pred, average='micro'))

K-Nearest Neighbors Results:

Music data set

confusion_matrix:
[[16  2  0  0]
 [ 1 19  0  0]
 [ 4  0 16  8]
 [ 3  3  3  3]]
Accuracy: 0.6923076923076923
Precision: 0.6923076923076923
Recall: 0.6923076923076923


After evaluating the performance of the algorithms we infer the following:

1. In the Heart Disease dataset, we observe that Naive Bayes algorithm
achieved the highest accuracy of 85.24%, followed by Logistic Regression with 77% accuracy. In terms of precision and recall, Naive Bayes algorithm achieved the highest values of 83.33% and 86.2% respectively.

2. In the Music Emotion Recognition dataset, we find that Naive Bayes algorithm achieved the highest accuracy of 78.2%, followed by Logistic Regression with 73% accuracy. In terms of precision and recall, Naive Bayes algorithm achieved the highest values of 78.2% and 78.2% respectively.

Therefore Naive bayes and logistic algorithms are expected to give more accurate results compared to other algorithms.