In [4]:
# Importing all necessary Libraries

import numpy as np # numpy used for mathematical operation on array
import pandas as pd  # pandas used for data manipulation on dataframe
import seaborn as sns 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings( "ignore" )

In [5]:
# Read the data with pandas

df = pd.read_csv("iris.csv", header=0)
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


# EDA Process 

In [6]:
# Checking the shape of the data
df.shape

(150, 5)

In [7]:
# Reading random Rows of the data

df.sample()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
14,5.8,4.0,1.2,0.2,setosa


In [8]:
#Reading the name of the columns

df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [10]:
#finding the datatype of each feature

df.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
dtype: object

In [11]:
 # checking the information of the dataset
    
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [12]:
#for the description 

df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [13]:
#total number of missing value in a table of each column

df.isnull().sum() 

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [14]:
dataset = df

In [15]:
# Converting non numerical column into numerical

from sklearn.preprocessing import LabelEncoder

Label = LabelEncoder() 
dataset['species'] = Label.fit_transform(df[ 'species'])


In [16]:
dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [17]:
# Separating the output from the dataset

X = dataset.loc[:,dataset.columns!="species"]


In [18]:
# Creating output column

y = dataset["species"]


In [21]:
# Checking the five rows of the input columns

X.sample(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
113,5.7,2.5,5.0,2.0
28,5.2,3.4,1.4,0.2
103,6.3,2.9,5.6,1.8
132,6.4,2.8,5.6,2.2
138,6.0,3.0,4.8,1.8


In [22]:
# Checking the five rows of the output columns

y.sample(3)

15    0
97    1
44    0
Name: species, dtype: int32

# train - test split

In [26]:
# Importing the train test split 

from sklearn.model_selection import train_test_split

In [27]:
# Separating the Training and testing Data

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30,random_state=0)

# Model without sklearn

In [28]:
y_train = np.array(y_train).reshape((-1, 1))

In [29]:
class LogisticRegression():
    def __init__(self, learning_rate=0.001, num_iterations=1000, y_pred = None):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.bias = None
        self.weights = None
    
    
    def softmax(self, z):
        exp_scores = np.exp(z)
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
            
    def fit(self, X, y,learning_rate=0.0001, num_iterations=100000):
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))
        self.weights = np.zeros((n_features, n_classes))
        self.bias = np.zeros(n_classes)
              
        for i in range(num_iterations):
            Z = np.dot(X, self.weights) + self.bias
            LR = self.softmax(Z)
            loss = -np.mean(np.sum(y * np.log(LR), axis=1))
            dZ = LR - y
            
            dW = 1 / n_samples * np.dot(X.T, dZ)
            db = 1 / n_samples * np.sum(dZ, axis=0)
            
            self.weights -= learning_rate * dW
            self.bias -= learning_rate * db
            if (i+1) % 100 == 0:
                print("Iteration {} - loss: {:.4f}".format(i+1, loss))   
                
                
    
            

    def predict(self, X, threshold = 3.3):
        Z = np.dot(X, self.weights) + self.bias
        LR = self.softmax(Z)
        class_pred = []
        for i in range(LR.shape[0]):
            above_threshold = np.where(LR[i] >= threshold)[0]
            if len(above_threshold) > 0:
                class_pred.append(above_threshold[np.argmax(LR[i, above_threshold])])
            else:
                class_pred.append(np.argmax(LR[i]))
        return np.array(class_pred)

    


In [30]:

model = LogisticRegression(learning_rate=0.001, num_iterations=1000, y_pred = None)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

Iteration 100 - loss: 3.4528
Iteration 200 - loss: 3.4528
Iteration 300 - loss: 3.4528
Iteration 400 - loss: 3.4528
Iteration 500 - loss: 3.4528
Iteration 600 - loss: 3.4528
Iteration 700 - loss: 3.4528
Iteration 800 - loss: 3.4528
Iteration 900 - loss: 3.4528
Iteration 1000 - loss: 3.4528
Iteration 1100 - loss: 3.4528
Iteration 1200 - loss: 3.4528
Iteration 1300 - loss: 3.4528
Iteration 1400 - loss: 3.4528
Iteration 1500 - loss: 3.4528
Iteration 1600 - loss: 3.4528
Iteration 1700 - loss: 3.4528
Iteration 1800 - loss: 3.4528
Iteration 1900 - loss: 3.4528
Iteration 2000 - loss: 3.4528
Iteration 2100 - loss: 3.4528
Iteration 2200 - loss: 3.4528
Iteration 2300 - loss: 3.4528
Iteration 2400 - loss: 3.4528
Iteration 2500 - loss: 3.4528
Iteration 2600 - loss: 3.4528
Iteration 2700 - loss: 3.4528
Iteration 2800 - loss: 3.4528
Iteration 2900 - loss: 3.4528
Iteration 3000 - loss: 3.4528
Iteration 3100 - loss: 3.4528
Iteration 3200 - loss: 3.4528
Iteration 3300 - loss: 3.4528
Iteration 3400 - lo

Iteration 27200 - loss: 3.4528
Iteration 27300 - loss: 3.4528
Iteration 27400 - loss: 3.4528
Iteration 27500 - loss: 3.4528
Iteration 27600 - loss: 3.4528
Iteration 27700 - loss: 3.4528
Iteration 27800 - loss: 3.4528
Iteration 27900 - loss: 3.4528
Iteration 28000 - loss: 3.4528
Iteration 28100 - loss: 3.4528
Iteration 28200 - loss: 3.4528
Iteration 28300 - loss: 3.4528
Iteration 28400 - loss: 3.4528
Iteration 28500 - loss: 3.4528
Iteration 28600 - loss: 3.4528
Iteration 28700 - loss: 3.4528
Iteration 28800 - loss: 3.4528
Iteration 28900 - loss: 3.4528
Iteration 29000 - loss: 3.4528
Iteration 29100 - loss: 3.4528
Iteration 29200 - loss: 3.4528
Iteration 29300 - loss: 3.4528
Iteration 29400 - loss: 3.4528
Iteration 29500 - loss: 3.4528
Iteration 29600 - loss: 3.4528
Iteration 29700 - loss: 3.4528
Iteration 29800 - loss: 3.4528
Iteration 29900 - loss: 3.4528
Iteration 30000 - loss: 3.4528
Iteration 30100 - loss: 3.4528
Iteration 30200 - loss: 3.4528
Iteration 30300 - loss: 3.4528
Iteratio

Iteration 54000 - loss: 3.4528
Iteration 54100 - loss: 3.4528
Iteration 54200 - loss: 3.4528
Iteration 54300 - loss: 3.4528
Iteration 54400 - loss: 3.4528
Iteration 54500 - loss: 3.4528
Iteration 54600 - loss: 3.4528
Iteration 54700 - loss: 3.4528
Iteration 54800 - loss: 3.4528
Iteration 54900 - loss: 3.4528
Iteration 55000 - loss: 3.4528
Iteration 55100 - loss: 3.4528
Iteration 55200 - loss: 3.4528
Iteration 55300 - loss: 3.4528
Iteration 55400 - loss: 3.4528
Iteration 55500 - loss: 3.4528
Iteration 55600 - loss: 3.4528
Iteration 55700 - loss: 3.4528
Iteration 55800 - loss: 3.4528
Iteration 55900 - loss: 3.4528
Iteration 56000 - loss: 3.4528
Iteration 56100 - loss: 3.4528
Iteration 56200 - loss: 3.4528
Iteration 56300 - loss: 3.4528
Iteration 56400 - loss: 3.4528
Iteration 56500 - loss: 3.4528
Iteration 56600 - loss: 3.4528
Iteration 56700 - loss: 3.4528
Iteration 56800 - loss: 3.4528
Iteration 56900 - loss: 3.4528
Iteration 57000 - loss: 3.4528
Iteration 57100 - loss: 3.4528
Iteratio

Iteration 81200 - loss: 3.4528
Iteration 81300 - loss: 3.4528
Iteration 81400 - loss: 3.4528
Iteration 81500 - loss: 3.4528
Iteration 81600 - loss: 3.4528
Iteration 81700 - loss: 3.4528
Iteration 81800 - loss: 3.4528
Iteration 81900 - loss: 3.4528
Iteration 82000 - loss: 3.4528
Iteration 82100 - loss: 3.4528
Iteration 82200 - loss: 3.4528
Iteration 82300 - loss: 3.4528
Iteration 82400 - loss: 3.4528
Iteration 82500 - loss: 3.4528
Iteration 82600 - loss: 3.4528
Iteration 82700 - loss: 3.4528
Iteration 82800 - loss: 3.4528
Iteration 82900 - loss: 3.4528
Iteration 83000 - loss: 3.4528
Iteration 83100 - loss: 3.4528
Iteration 83200 - loss: 3.4528
Iteration 83300 - loss: 3.4528
Iteration 83400 - loss: 3.4528
Iteration 83500 - loss: 3.4528
Iteration 83600 - loss: 3.4528
Iteration 83700 - loss: 3.4528
Iteration 83800 - loss: 3.4528
Iteration 83900 - loss: 3.4528
Iteration 84000 - loss: 3.4528
Iteration 84100 - loss: 3.4528
Iteration 84200 - loss: 3.4528
Iteration 84300 - loss: 3.4528
Iteratio

In [31]:
# Importing accuracy-score and confusion_matrix package

from sklearn.metrics import accuracy_score

# Accuracy score of Logistic Regression model without using sklearn package

In [32]:
# Checking the accuracy Score

accuracy_score(y_test, y_pred)

0.35555555555555557

# Model with sklearn

In [33]:
#importing LogisticRegression package

from sklearn.linear_model import LogisticRegression
model1 = LogisticRegression()
model1.fit(X_train, y_train)
y_pred = model1.predict(X_test)

In [34]:
# Importing accuracy-score and confusion_matrix package

from sklearn.metrics import accuracy_score

In [None]:
# Accuracy score 

In [26]:
# Checking the accuracy Score

accuracy_score(y_test, y_pred)

0.9777777777777777