# Logistic Regression on iris dataset (with standardization)

## Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Dataset

In [2]:
df = pd.read_csv('iris.csv')
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [3]:
df[df.columns[:-1]]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
x = df[df.columns[:-1]]
x

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
y = df[df.columns[-1:]]
y

Unnamed: 0,species
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,virginica
146,virginica
147,virginica
148,virginica


## Standardization

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()

In [8]:
scaler.fit(x)

In [9]:
x = scaler.transform(x)

In [10]:
x = pd.DataFrame(x, columns=df.columns[:-1])
x

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,-0.900681,1.032057,-1.341272,-1.312977
1,-1.143017,-0.124958,-1.341272,-1.312977
2,-1.385353,0.337848,-1.398138,-1.312977
3,-1.506521,0.106445,-1.284407,-1.312977
4,-1.021849,1.263460,-1.341272,-1.312977
...,...,...,...,...
145,1.038005,-0.124958,0.819624,1.447956
146,0.553333,-1.281972,0.705893,0.922064
147,0.795669,-0.124958,0.819624,1.053537
148,0.432165,0.800654,0.933356,1.447956


## Train Test Split

In [11]:
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.25,random_state=101)

In [12]:
x_train

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
94,-0.294842,-0.819166,0.250967,0.133226
135,2.249683,-0.124958,1.331416,1.447956
21,-0.900681,1.494863,-1.284407,-1.050031
22,-1.506521,1.263460,-1.568735,-1.312977
29,-1.385353,0.337848,-1.227541,-1.312977
...,...,...,...,...
63,0.310998,-0.356361,0.535296,0.264699
70,0.068662,0.337848,0.592162,0.790591
81,-0.416010,-1.513375,-0.033361,-0.261193
11,-1.264185,0.800654,-1.227541,-1.312977


In [13]:
y_train

Unnamed: 0,species
94,versicolor
135,virginica
21,setosa
22,setosa
29,setosa
...,...
63,versicolor
70,versicolor
81,versicolor
11,setosa


## Logistic Regression

In [14]:
from sklearn.linear_model import LogisticRegression

In [15]:
model = LogisticRegression(solver='saga',multi_class="ovr",max_iter=5000)

In [16]:
model.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


## Test Accuracy

In [19]:
model.score(x_test,y_test)

0.9736842105263158

In [17]:
y_pred = model.predict(x_test)

In [18]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9736842105263158