## Importing the libraries

In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
os.listdir()

['.ipynb_checkpoints',
 'EDA.pdf',
 'Exploratory_Data_Analysis.ipynb',
 'IRIS.csv',
 'Prediction.ipynb']

## Importing the dataset

In [3]:
df = pd.read_csv("IRIS.csv")

In [4]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


## Data preparation

In [5]:
# drop the duplicate records
df = df.drop_duplicates(keep = 'first')

In [6]:
# enocode the target varible
df["species"] = df["species"].map({"Iris-setosa":0,"Iris-versicolor":1,"Iris-virginica":2})

In [8]:
# separate features and target variable
X = df.drop(labels="species", axis=1)
y = df["species"]

In [77]:
# splitting the data into training and test/validation set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify = y)

In [66]:
# With the X values split between training and test, now we can standardize the values.
# This puts the numbers on a consistent scale while keeping the proportional relationship between them
sc = StandardScaler()
X_train = pd.DataFrame(sc.fit_transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(sc.transform(X_test), columns=X_test.columns)

## Model training and evaluation

In [67]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred=model.predict(X_test)

In [79]:
print("Model accuracy = {}".format(round(accuracy_score(y_pred,y_test),2)))

Model accuracy = 0.98


In [83]:
pd.DataFrame(confusion_matrix(y_test, y_pred,labels=list(df["species"].unique())),index=list(df["species"].unique()), columns=list(df["species"].unique()))

Unnamed: 0,0,1,2
0,15,0,0
1,0,15,0
2,0,1,14


In [88]:
report = metrics.classification_report(y_test, y_pred, output_dict=True)
df_classification_report = pd.DataFrame(report).transpose()
df_classification_report

Unnamed: 0,precision,recall,f1-score,support
0,1.0,1.0,1.0,15.0
1,0.9375,1.0,0.967742,15.0
2,1.0,0.933333,0.965517,15.0
accuracy,0.977778,0.977778,0.977778,0.977778
macro avg,0.979167,0.977778,0.977753,45.0
weighted avg,0.979167,0.977778,0.977753,45.0


## Making Prediction

In [89]:
# predict species based on list of inputs
predcition = model.predict([[5.0,3.5,1.4,0.4]])

if predcition[0] == 0:
    print("Predicted Class: Iris-setosa")
elif predcition[0] == 1:
    print("Predicted Class: Iris-versicolor")
else:
    print("Predicted Class: Iris-virginica")

Predicted Class: Iris-virginica
