In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
data = pd.read_csv("iris.csv")

In [3]:
print(data.head())

    SL   SW   PL   PW      Species
0  5.1  3.5  1.4  0.2  Iris-setosa
1  4.9  3.0  1.4  0.2  Iris-setosa
2  4.7  3.2  1.3  0.2  Iris-setosa
3  4.6  3.1  1.5  0.2  Iris-setosa
4  5.0  3.6  1.4  0.2  Iris-setosa


In [4]:
print("\n column names:-")
print(data.columns)


 column names:-
Index(['SL', 'SW', 'PL', 'PW', 'Species'], dtype='object')


In [5]:
encode = LabelEncoder()
data.Species = encode.fit_transform(data.Species)

In [6]:
print(data.head())

    SL   SW   PL   PW  Species
0  5.1  3.5  1.4  0.2        0
1  4.9  3.0  1.4  0.2        0
2  4.7  3.2  1.3  0.2        0
3  4.6  3.1  1.5  0.2        0
4  5.0  3.6  1.4  0.2        0


In [7]:
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [8]:
print("\n Shape of training data:- ", train.shape)
print("\n Shape of test data:- ", test.shape)


 Shape of training data:-  (120, 5)

 Shape of test data:-  (30, 5)


In [9]:
train_x = train.drop(columns=['Species'], axis=1)
train_y = train['Species']

In [10]:
test_x = test.drop(columns=['Species'], axis=1)
test_y = test['Species']

In [None]:
#Logistic regression measures the relationship between the categorical dependent variable and
#one or more independent variables by estimating probabilities using a logistic function.
#Logistic regression is a special case of the generalized linear regression.

In [11]:
model = LogisticRegression(multi_class='auto')
model.fit(train_x, train_y)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [12]:
predict = model.predict(test_x)

In [13]:
print("\n Predicted values on test data", encode.inverse_transform(predict))


 Predicted values on test data ['Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor' 'Iris-virginica'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'
 'Iris-virginica' 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica'
 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-setosa' 'Iris-setosa']


In [14]:
print("\n\nAccuracy score on test data: ")
print(accuracy_score(test_y, predict))



Accuracy score on test data: 
1.0


In [15]:
report = classification_report(test_y, predict)
print("\nClassification Report: ", report)



Classification Report:                precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

