##libraries

In [40]:
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression #for logistic regression
from sklearn.neighbors import KNeighborsClassifier #for knn
from sklearn.naive_bayes import GaussianNB #for naive bayes
from sklearn.svm import SVC #for svm
from sklearn.tree import DecisionTreeClassifier #for decision tree
from sklearn.ensemble import RandomForestClassifier #for random forest

from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix

##loading the dataset from google drive

In [41]:
#mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
filepath = '/content/drive/MyDrive/ICT /Dataset/iris.csv'
df_iris = pd.read_csv(filepath)
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


###preprocessing

In [43]:
encoder = LabelEncoder()
df_iris['species'] = encoder.fit_transform(df_iris['species'])
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


### Model building

In [44]:
#split data into test and train
from sklearn.model_selection import train_test_split

x = df_iris.drop('species', axis=1)
y = df_iris['species']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [45]:
#building logistic regression model
log_reg_model = LogisticRegression()
log_reg_model.fit(x_train, y_train) #traing the model

y_pred = log_reg_model.predict(x_test) #testing the model

#evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print("Accuracy", accuracy)
print("Precision", precision)
print("Recall", recall)

Accuracy 1.0
Precision 1.0
Recall 1.0


In [46]:
#prediction using threshold
x_threshold = log_reg_model.predict_proba(x_test)[:,1]
#for class 1
y_pred_threshold = (x_threshold >= 0.33).astype(int)
#evaluate the model
accuracy_thresh = accuracy_score(y_test, y_pred_threshold)
precision_thresh = precision_score(y_test, y_pred_threshold, average='weighted')
recall_thresh = recall_score(y_test, y_pred_threshold, average='weighted')

print("threshold value used = 0.33")
print("Accuracy", accuracy_thresh)
print("Precision", precision_thresh)
print("Recall", recall_thresh)

threshold value used = 0.33
Accuracy 0.7666666666666667
Precision 0.6422222222222221
Recall 0.7666666666666667


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#k-NN(k-nearest neighbours)

In [55]:
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(x_train, y_train) #training the model
y_pred = knn_model.predict(x_test) #prediction
#evaluate the model
accuracy_knn = accuracy_score(y_test, y_pred)
precision_knn = precision_score(y_test, y_pred, average='weighted')
recall_knn = recall_score(y_test, y_pred, average='weighted')

print("Accuracy", accuracy_knn)
print("Precision", precision_knn)
print("Recall", recall_knn)

Accuracy 0.9666666666666667
Precision 0.9708333333333333
Recall 0.9666666666666667


#Naive Bayes Classification

In [48]:
nb_model = GaussianNB()
nb_model.fit(x_train, y_train) #training the model
y_pred_nb = nb_model.predict(x_test) #prediction
#evaluation
accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')

print("Accuracy", accuracy_nb)
print("Precision", precision_nb)
print("Recall", recall_nb)

Accuracy 1.0
Precision 1.0
Recall 1.0


##SVM classifier

In [49]:
svm_model = SVC() #defining the model
svm_model.fit(x_train, y_train) #training the model
y_pred_svm = svm_model.predict(x_test) #prediction
#evaluation
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
confusion_matrix_svm = confusion_matrix(y_test, y_pred_svm)

print("Accuracy", accuracy_svm)
print("Precision", precision_svm)
print("Recall", recall_svm)
print("Confusion Matrix",confusion_matrix_svm)

Accuracy 1.0
Precision 1.0
Recall 1.0
Confusion Matrix [[ 8  0  0]
 [ 0 15  0]
 [ 0  0  7]]


#Decision tree

In [50]:
decision_tree_model = DecisionTreeClassifier(criterion='gini')
decision_tree_model.fit(x_train, y_train) #training the model
y_pred_dt = decision_tree_model.predict(x_test) #prediction
#evaluation
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted')
recall_dt = recall_score(y_test, y_pred_dt, average='weighted')
confusion_matrix_dt = confusion_matrix(y_test, y_pred_dt)

print("Accuracy", accuracy_dt)
print("Precision", precision_dt)
print("Recall", recall_dt)
print("Confusion Matrix",confusion_matrix_dt)

Accuracy 0.9666666666666667
Precision 0.96875
Recall 0.9666666666666667
Confusion Matrix [[ 8  0  0]
 [ 0 15  0]
 [ 0  1  6]]


##comparison of models

In [56]:
#building a dataframe
model_comparison = pd.DataFrame({
    'Model': ['Logistic Regression', 'K-NN', 'Naive Bayes', 'SVM', 'Decision Tree'],
    'Accuracy': [accuracy, accuracy_knn, accuracy_nb, accuracy_svm, accuracy_dt],
    'Precision': [precision, precision_knn, precision_nb, precision_svm, precision_dt],
    'Recall': [recall, recall_knn, recall_nb, recall_svm, recall_dt]
})
model_comparison

Unnamed: 0,Model,Accuracy,Precision,Recall
0,Logistic Regression,0.966667,0.970833,0.966667
1,K-NN,0.966667,0.970833,0.966667
2,Naive Bayes,1.0,1.0,1.0
3,SVM,1.0,1.0,1.0
4,Decision Tree,0.966667,0.96875,0.966667
