<a href="https://colab.research.google.com/github/AsmiSanthosh/Classification_on_iris_dataset/blob/main/Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

####Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier # for knn
from sklearn.naive_bayes import GaussianNB # for naive_bayes
from sklearn.svm import SVC # for svm
from sklearn.tree import DecisionTreeClassifier  # for decision tree
from sklearn.ensemble import RandomForestClassifier # for random forest

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

####Loading dataset from google drive

In [2]:
#mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
filepath = '/content/drive/MyDrive/DSA/Data/iris.csv'

df_iris = pd.read_csv(filepath)
df_iris.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


####Preprocessing

In [4]:
# iris dataset is considered to be already preprocessed.
#so, only encoding is done

encoder = LabelEncoder()
df_iris['species'] = encoder.fit_transform(df_iris['species'])
df_iris.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


####Model Building

In [5]:
X = df_iris.drop(columns = ['species'])
y = df_iris['species']

X_train, X_test, y_train, y_test = train_test_split(X , y, test_size = 0.2, random_state = 42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_train:", y_train.shape)
print("Shape of Y_test:", y_test.shape)

Shape of X_train: (120, 4)
Shape of X_test: (30, 4)
Shape of Y_train: (120,)
Shape of Y_test: (30,)


In [6]:
##Building Logistic Regression Model
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, y_train)  # training the model

y_pred = log_reg_model.predict(X_test)  # predicting the test data

# evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [7]:
# prediction using threshold
y_pred_threshold = log_reg_model.predict_proba(X_test)[:, 1]
# for class 1
y_pred_threshold = (y_pred_threshold > 0.33).astype(int)
#evaluate the model
accuracy_thresh = accuracy_score(y_test, y_pred_threshold)
precision_thresh = precision_score(y_test, y_pred_threshold, average='weighted')
recall_thresh = recall_score(y_test, y_pred_threshold, average='weighted')

print("Threshold value used = 0.33")
print("Accuracy:", accuracy_thresh)
print("Precision:", precision_thresh)
print("Recall:", recall_thresh)

Threshold value used = 0.33
Accuracy: 0.6333333333333333
Precision: 0.43666666666666665
Recall: 0.6333333333333333


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


####k-NN (k-nearest neighbours)

In [8]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train) # training the model
y_pred = knn.predict(X_test) # predicting the test data

# evaluate the model
accuracy_knn = accuracy_score(y_test, y_pred)
precision_knn = precision_score(y_test, y_pred, average='weighted')
recall_knn = recall_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy_knn)
print("Precision:", precision_knn)
print("Recall:", recall_knn)


Accuracy: 1.0
Precision: 1.0
Recall: 1.0


####Naive-Bayes classification

In [9]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train) # training the model
y_pred_nb = nb_model.predict(X_test) # predicting the test data

# evaluate the model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')

print("Accuracy:", accuracy_nb)
print("Precision:", precision_nb)
print("Recall:", recall_nb)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


####SVM classifier

In [10]:
svm_model = SVC()
svm_model.fit(X_train, y_train) # training the model
y_pred_svm = svm_model.predict(X_test) # predicting the test data

# evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
confusion_matrix_svm = confusion_matrix(y_test, y_pred_svm)

print("Accuracy:", accuracy_svm)
print("Precision:", precision_svm)
print("Recall:", recall_svm)
print("Confusion Matrix:\n", confusion_matrix_svm)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


####Decision Tree

In [11]:
dec_tree_model = DecisionTreeClassifier()
dec_tree_model.fit(X_train, y_train) # training the model
y_pred_dec_tree = dec_tree_model.predict(X_test) # predicting the test data

# evaluate the model
accuracy_dec_tree = accuracy_score(y_test, y_pred_dec_tree)
precision_dec_tree = precision_score(y_test, y_pred_dec_tree, average='weighted')
recall_dec_tree = recall_score(y_test, y_pred_dec_tree, average='weighted')

print("Accuracy:", accuracy_dec_tree)
print("Precision:", precision_dec_tree)
print("Recall:", recall_dec_tree)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


####Random Forest

In [12]:
rand_forest_model = RandomForestClassifier()
rand_forest_model.fit(X_train, y_train) # training the model
y_pred_rand_forest = rand_forest_model.predict(X_test) # predicting the test data

# evaluate the model
accuracy_rand_forest = accuracy_score(y_test, y_pred_rand_forest)
precision_rand_forest = precision_score(y_test, y_pred_rand_forest, average='weighted')
recall_rand_forest = recall_score(y_test, y_pred_rand_forest, average='weighted')

print("Accuracy:", accuracy_rand_forest)
print("Precision:", precision_rand_forest)
print("Recall:", recall_rand_forest)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


####Model table

In [14]:
model_table= {
    'model':['Logistic_regression','KNN','Naive_bayes','SVM','Decision_tree','Random_forest'],
    'accuracy':[accuracy,accuracy_knn,accuracy_nb,accuracy_svm,accuracy_dec_tree,accuracy_rand_forest],
    'precision':[precision,precision_knn,precision_nb,precision_svm,precision_dec_tree,precision_rand_forest],
    'recall':[recall,recall_knn,recall_nb,recall_svm,recall_dec_tree,recall_rand_forest]
}
df_model_table = pd.DataFrame(model_table)
df_model_table

Unnamed: 0,model,accuracy,precision,recall
0,Logistic_regression,1.0,1.0,1.0
1,KNN,1.0,1.0,1.0
2,Naive_bayes,1.0,1.0,1.0
3,SVM,1.0,1.0,1.0
4,Decision_tree,1.0,1.0,1.0
5,Random_forest,1.0,1.0,1.0
