# <center><b> flower_classification

In [53]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay, confusion_matrix,precision_recall_fscore_support

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.filterwarnings("ignore")

In [6]:
iris=load_iris()
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [12]:
data=pd.DataFrame(data=iris.data,columns=iris.feature_names)
data["target"]=iris.target

In [13]:
data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


### EDA

In [14]:
df=data.copy()

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    int32  
dtypes: float64(4), int32(1)
memory usage: 5.4 KB


##### Feature Engineering and Feature Selection not required as data is clean

### Train Test Split

In [41]:
x=df.drop("target",axis=1)
y=df["target"]
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.25,random_state=42,stratify=y)

### Model Training

In [42]:
lg_model=LogisticRegression()
rf_model=RandomForestClassifier()
dt_model=DecisionTreeClassifier()
knn_model=KNeighborsClassifier()
adb_model=AdaBoostClassifier()

In [110]:
performance_report=pd.DataFrame({"model_name":[],"accuracy_score_test":[],"accuracy_score_train":[],"precision_test":[],
                                     "precision_train":[],"recall_test":[],"recall_train":[],"fscore_test":[],"fscore_train":[]})

In [111]:
performance_report=pd.DataFrame({"model_name":[],"accuracy_score_test":[],"accuracy_score_train":[],"precision_test":[],
                                     "precision_train":[],"recall_test":[],"recall_train":[],"fscore_test":[],"fscore_train":[]})
def model_performance(model_abbr,model_name):
    print("On Test Data")
    y_pred=model_abbr.predict(x_test)
    acc_score_test=accuracy_score(y_test,y_pred)
    clf_report=classification_report(y_test,y_pred)
    print(clf_report)
    precision_test,recall_test,fscore_test,_test=precision_recall_fscore_support(y_test,y_pred)

    print("*"*40)
    print("On Train Data")
    y_pred_train=model_abbr.predict(x_train)
    acc_score_train=accuracy_score(y_train,y_pred_train)
    clf_report=classification_report(y_train,y_pred_train)
    print(clf_report)
    print("\n")
    precision_train,recall_train,fscore_train,_train=precision_recall_fscore_support(y_test,y_pred)
    model_evaluation=pd.DataFrame({"model_name":[model_name],"accuracy_score_test":[acc_score_test],"accuracy_score_train":[acc_score_train],
                                   "precision_test":[precision_test],"precision_train":[precision_train],"recall_test":[recall_test],
                                   "recall_train":[recall_train],"fscore_test":[fscore_test],"fscore_train":[fscore_train]})
    performance_report=pd.concat(performance_report,model_evaluation)
    print("Performance report updated succesfully")

##### 1) Logistic Regression

##### Training

In [112]:
lg_model.fit(x_train,y_train)

##### Evaluation

In [114]:
model_performance(lg_model,"logistic Regression")

On Test Data
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.92      0.92      0.92        13
           2       0.92      0.92      0.92        13

    accuracy                           0.95        38
   macro avg       0.95      0.95      0.95        38
weighted avg       0.95      0.95      0.95        38

****************************************
On Train Data
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.97      0.95      0.96        37
           2       0.95      0.97      0.96        37

    accuracy                           0.97       112
   macro avg       0.97      0.97      0.97       112
weighted avg       0.97      0.97      0.97       112





UnboundLocalError: local variable 'performance_report' referenced before assignment

#### 2) Random Forest

##### Training

In [45]:
rf_model.fit(x_train,y_train)

In [46]:
print("On Test Data")
y_pred=rf_model.predict(x_test)
acc_score=accuracy_score(y_test,y_pred)
print(acc_score)
clf_report=classification_report(y_test,y_pred)
print(clf_report)
cnf_matrix=confusion_matrix(y_test,y_pred)
print(cnf_matrix)
print("*"*40)
print("On Train Data")
y_pred_train=rf_model.predict(x_train)
acc_score=accuracy_score(y_train,y_pred_train)
print(acc_score)
clf_report=classification_report(y_train,y_pred_train)
print(clf_report)
cnf_matrix=confusion_matrix(y_train,y_pred_train)
print(cnf_matrix)

On Test Data
0.9473684210526315
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.92      0.92      0.92        13
           2       0.92      0.92      0.92        13

    accuracy                           0.95        38
   macro avg       0.95      0.95      0.95        38
weighted avg       0.95      0.95      0.95        38

[[12  0  0]
 [ 0 12  1]
 [ 0  1 12]]
****************************************
On Train Data
1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       1.00      1.00      1.00        37
           2       1.00      1.00      1.00        37

    accuracy                           1.00       112
   macro avg       1.00      1.00      1.00       112
weighted avg       1.00      1.00      1.00       112

[[38  0  0]
 [ 0 37  0]
 [ 0  0 37]]


#### 3) KNN Model

##### Training

In [47]:
knn_model.fit(x_train,y_train)

In [48]:
print("On Test Data")
y_pred=knn_model.predict(x_test)
acc_score=accuracy_score(y_test,y_pred)
print(acc_score)
clf_report=classification_report(y_test,y_pred)
print(clf_report)
cnf_matrix=confusion_matrix(y_test,y_pred)
print(cnf_matrix)
print("*"*40)
print("On Train Data")
y_pred_train=knn_model.predict(x_train)
acc_score=accuracy_score(y_train,y_pred_train)
print(acc_score)
clf_report=classification_report(y_train,y_pred_train)
print(clf_report)
cnf_matrix=confusion_matrix(y_train,y_pred_train)
print(cnf_matrix)

On Test Data
0.9736842105263158
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.93      1.00      0.96        13
           2       1.00      0.92      0.96        13

    accuracy                           0.97        38
   macro avg       0.98      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38

[[12  0  0]
 [ 0 13  0]
 [ 0  1 12]]
****************************************
On Train Data
0.9732142857142857
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.97      0.95      0.96        37
           2       0.95      0.97      0.96        37

    accuracy                           0.97       112
   macro avg       0.97      0.97      0.97       112
weighted avg       0.97      0.97      0.97       112

[[38  0  0]
 [ 0 35  2]
 [ 0  1 36]]


#### 4) Decision Tree

##### Training

In [49]:
dt_model.fit(x_train,y_train)

In [50]:
print("On Test Data")
y_pred=dt_model.predict(x_test)
acc_score=accuracy_score(y_test,y_pred)
print(acc_score)
clf_report=classification_report(y_test,y_pred)
print(clf_report)
cnf_matrix=confusion_matrix(y_test,y_pred)
print(cnf_matrix)
print("*"*40)
print("On Train Data")
y_pred_train=dt_model.predict(x_train)
acc_score=accuracy_score(y_train,y_pred_train)
print(acc_score)
clf_report=classification_report(y_train,y_pred_train)
print(clf_report)
cnf_matrix=confusion_matrix(y_train,y_pred_train)
print(cnf_matrix)

On Test Data
0.9210526315789473
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.86      0.92      0.89        13
           2       0.92      0.85      0.88        13

    accuracy                           0.92        38
   macro avg       0.92      0.92      0.92        38
weighted avg       0.92      0.92      0.92        38

[[12  0  0]
 [ 0 12  1]
 [ 0  2 11]]
****************************************
On Train Data
1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       1.00      1.00      1.00        37
           2       1.00      1.00      1.00        37

    accuracy                           1.00       112
   macro avg       1.00      1.00      1.00       112
weighted avg       1.00      1.00      1.00       112

[[38  0  0]
 [ 0 37  0]
 [ 0  0 37]]


#### 5) AdaBoost Model

##### Training

In [51]:
adb_model.fit(x_train,y_train)

In [60]:
print("On Test Data")
y_pred=adb_model.predict(x_test)
acc_score=accuracy_score(y_test,y_pred)
print(acc_score)
clf_report=classification_report(y_test,y_pred)
print(clf_report)
cnf_matrix=confusion_matrix(y_test,y_pred)
print(cnf_matrix)
print("*"*40)
print("On Train Data")
y_pred_train=adb_model.predict(x_train)
acc_score=accuracy_score(y_train,y_pred_train)
print(acc_score)
clf_report=classification_report(y_train,y_pred_train)
print(clf_report)
cnf_matrix=confusion_matrix(y_train,y_pred_train)
print(cnf_matrix)

On Test Data
0.9473684210526315
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.92      0.92      0.92        13
           2       0.92      0.92      0.92        13

    accuracy                           0.95        38
   macro avg       0.95      0.95      0.95        38
weighted avg       0.95      0.95      0.95        38

[[12  0  0]
 [ 0 12  1]
 [ 0  1 12]]
****************************************
On Train Data
0.9642857142857143
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.92      0.97      0.95        37
           2       0.97      0.92      0.94        37

    accuracy                           0.96       112
   macro avg       0.96      0.96      0.96       112
weighted avg       0.97      0.96      0.96       112

[[38  0  0]
 [ 0 36  1]
 [ 0  3 34]]


In [66]:
precision,recall,fscore,_=precision_recall_fscore_support(y_test,y_pred,average="macro")

In [67]:
precision

0.9487179487179488

In [68]:
recall

0.9487179487179488

In [69]:
fscore

0.9487179487179488

In [108]:
performance_report=pd.DataFrame({"model_name":[],"accuracy_score_test":[],"accuracy_score_train":[],"precision_test":[],
                                     "precision_train":[],"recall_test":[],"recall_train":[],"fscore_test":[],"fscore_train":[]})
def model_performance(model_abbr,model_name):
    print("On Test Data")
    y_pred=model_abbr.predict(x_test)
    acc_score_test=accuracy_score(y_test,y_pred)
    clf_report=classification_report(y_test,y_pred)
    print(clf_report)
    precision_test,recall_test,fscore_test,_test=precision_recall_fscore_support(y_test,y_pred)

    print("*"*40)
    print("On Train Data")
    y_pred_train=model_abbr.predict(x_train)
    acc_score_train=accuracy_score(y_train,y_pred_train)
    clf_report=classification_report(y_train,y_pred_train)
    print(clf_report)
    print("\n")
    precision_train,recall_train,fscore_train,_train=precision_recall_fscore_support(y_test,y_pred)
    model_evaluation=pd.DataFrame({"model_name":[model_name],"accuracy_score_test":[acc_score_test],"accuracy_score_train":[acc_score_train],
                                   "precision_test":[precision_test],"precision_train":[precision_train],"recall_test":[recall_test],
                                   "recall_train":[recall_train],"fscore_test":[fscore_test],"fscore_train":[fscore_train]})
    performance_report=pd.concat(performance_report,model_evaluation)
    pritnt("Performance report updated succesfully")

In [98]:
model_performance(adb_model,"AdaBoost Classifier")

On Test Data
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.92      0.92      0.92        13
           2       0.92      0.92      0.92        13

    accuracy                           0.95        38
   macro avg       0.95      0.95      0.95        38
weighted avg       0.95      0.95      0.95        38

****************************************
On Train Data
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.92      0.97      0.95        37
           2       0.97      0.92      0.94        37

    accuracy                           0.96       112
   macro avg       0.96      0.96      0.96       112
weighted avg       0.97      0.96      0.96       112

Model Evaluation :
             model_name  accuracy_score_test  accuracy_score_train  \
0  AdaBoost Classifier             0.947368              0.964286   

           

In [109]:
model_evaluation

Unnamed: 0,model_name,accuracy_score_test,accuracy_score_train,precision_test,precision_train,recall_test,recall_train,fscore_test,fscore_train


In [83]:
test_df=pd.DataFrame({"a":[2,3], "b":[5,6],"c":[3,9]})
test_df

Unnamed: 0,a,b,c
0,2,5,3
1,3,6,9


In [87]:
test_df=pd.concat([test_df,pd.DataFrame({"a":[888],"b":[777],"c":[999]})],ignore_index=True)

In [88]:
test_df

Unnamed: 0,a,b,c
0,2,5,3
1,3,6,9
2,888,777,999


In [92]:
model_name='lr_model'
model_evaluation=pd.DataFrame({"model_name":[],"accuracy_score_test":[],"accuracy_score_train":[],"precision_test":[],
                                     "precision_train":[],"recall_test":[],"recall_train":[],"fscore_test":[],"fscore_train":[]})

In [93]:
model_evaluation

Unnamed: 0,model_name,accuracy_score_test,accuracy_score_train,precision_test,precision_train,recall_test,recall_train,fscore_test,fscore_train


In [104]:
model_evaluation

Unnamed: 0,model_name,accuracy_score_test,accuracy_score_train,precision_test,precision_train,recall_test,recall_train,fscore_test,fscore_train
