<a href="https://colab.research.google.com/github/Thahira8/assesmnt/blob/main/Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [25]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score # for logic regression
from sklearn.neighbors import KNeighborsClassifier # for KNN
from sklearn.naive_bayes import GaussianNB #for naive bayes
from sklearn.svm import SVC # for SVM
from sklearn.tree import DecisionTreeClassifier # for Decision Tree
from sklearn.ensemble import RandomForestClassifier #for Random Forest


# Loading the dataset from google drive

In [3]:
# mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
filepath = '/content/drive/MyDrive/ICTAK/DATA /iris.csv'

In [5]:
df_iris = pd.read_csv(filepath)
df_iris.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


#Prepocessing

In [6]:
# iris dataset is considered to be already preprocessed.
# so, only enoding is done

encoder = LabelEncoder()
df_iris['species'] = encoder.fit_transform(df_iris['species'])
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


# Model Building

In [7]:
# split data into train and test

y = df_iris['species']
x = df_iris.drop('species', axis = 1)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=101)

In [9]:
# building logistics regression model

log_reg_model = LogisticRegression()
log_reg_model.fit(x_train, y_train) #training the model
y_pred = log_reg_model.predict(x_test) #predicting the test data
#evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [10]:
# prediction using threshold
y_pred_threshold = log_reg_model.predict_proba(x_test)[:,1]
#for class 1
y_pred_threshold = (y_pred_threshold > 0.33).astype(int)
# evaluate the model
accuracy_thresh = accuracy_score(y_test, y_pred_threshold)
precision_thresh = precision_score(y_test, y_pred_threshold, average='weighted')
recall_thresh = recall_score(y_test, y_pred_threshold, average='weighted')

print('Tgreshold value used = 0.33')
print('Accuracy:', accuracy_thresh)
print('Precision:', precision_thresh)
print('Recall:', recall_thresh)

Tgreshold value used = 0.33
Accuracy: 0.7333333333333333
Precision: 0.5511904761904761
Recall: 0.7333333333333333


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


K-NN (K-nearest neighbours)

In [12]:
knm = KNeighborsClassifier(n_neighbors=3)
knm.fit(x_train, y_train) # training the model
y_pred = knm.predict(x_test) #prediction
# evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


# Navie-Bayes Classification

### Uses navie bayes theorem

In [14]:
nb_model = GaussianNB()
nb_model.fit(x_train, y_train) # training the model
y_pred_nb = nb_model.predict(x_test) #prediction
# evaluate the model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')

print("Accuracy:", accuracy_nb)
print("Precision:", precision_nb)
print("Recall:", recall_nb)

Accuracy: 0.9666666666666667
Precision: 0.9692307692307692
Recall: 0.9666666666666667


SVM classifier

In [16]:
svm_model = SVC()
svm_model.fit(x_train, y_train) # training the model
y_pred_svm = svm_model.predict(x_test) #prediction
# evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
confusion_matrix_svm = confusion_matrix(y_test, y_pred_svm)

print("Accuracy:", accuracy_svm)
print("Precision:", precision_svm)
print("Recall:", recall_svm)
print("Confusion matrix:", confusion_matrix_svm)

Accuracy: 0.9666666666666667
Precision: 0.9692307692307692
Recall: 0.9666666666666667
Confusion matrix: [[10  0  0]
 [ 0 12  0]
 [ 0  1  7]]


In [17]:
print(df_iris['species'].unique())

[0 1 2]


# Decision Tree and Random Forest

Decision Tree

In [24]:
dsn_tree_model=DecisionTreeClassifier(criterion='gini') #criterion can be gini,entropy
dsn_tree_model.fit(x_train,y_train)#training the model
y_pred_dsn_tree=dsn_tree_model.predict(x_test)#prediction
#Evaluation
accuracy_dsn_tree=accuracy_score(y_test,y_pred_dsn_tree)
precision_dsn_tree=precision_score(y_test,y_pred_dsn_tree,average='weighted')
recall_dsn_tree=recall_score(y_test,y_pred_dsn_tree,average='weighted')

print("Accuracy : ",accuracy_dsn_tree)
print("Precision : ",precision_dsn_tree)
print("Recall : ",recall_dsn_tree)
print(confusion_matrix_svm)

Accuracy :  0.9666666666666667
Precision :  0.9692307692307692
Recall :  0.9666666666666667
[[10  0  0]
 [ 0 12  0]
 [ 0  1  7]]


Random Forest

In [26]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(x_train,y_train)
y_pred_rf=rf_model.predict(x_test)
#evaluation
accuracy_rf=accuracy_score(y_test,y_pred_rf)
precision_rf=precision_score(y_test,y_pred_rf,average='weighted')
recall_rf=recall_score(y_test,y_pred_rf,average='weighted')

print("Accuracy:", accuracy_rf)
print("Precision:", precision_rf)
print("Recall:", recall_rf)

Accuracy: 0.9666666666666667
Precision: 0.9692307692307692
Recall: 0.9666666666666667


# Comparision Models



In [29]:
#create a dictionary to store the evaluation matrices
metrics = {
    'Logistic Regression': [accuracy, precision, recall],
    'k-NN': [accuracy, precision, recall],
    'Naive Bayes': [accuracy_nb, precision_nb, recall_nb],
    'SVM': [accuracy_svm, precision_svm, recall_svm],
    'Decision Tree': [accuracy_dsn_tree, precision_dsn_tree, recall_dsn_tree],
    'Random Forest': [accuracy_rf, precision_rf, recall_rf]
}

#create a pandas dataframe from the evaluation matrices
df_metrics = pd.DataFrame(metrics, index=['Accuracy', 'Precision', 'Recall'])

#print the dataframe
df_metrics

Unnamed: 0,Logistic Regression,k-NN,Naive Bayes,SVM,Decision Tree,Random Forest
Accuracy,1.0,1.0,0.966667,0.966667,0.966667,0.966667
Precision,1.0,1.0,0.969231,0.969231,0.969231,0.969231
Recall,1.0,1.0,0.966667,0.966667,0.966667,0.966667
