In [28]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

In [26]:
iris = load_iris()

In [27]:
print(iris.DESCR)  # Prints the description of the dataset

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988

The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fis

In [14]:
print(iris.data[:5])  # Prints the first five rows of the data
print(iris.target[:5])  # Prints the first five target values
print(iris.feature_names)  # Prints the names of the features

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[0 0 0 0 0]
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [15]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [36]:
def compute_cost(y, y_predicted):
    m = len(y)
    cost = -(1/m) * np.sum(y * np.log(y_predicted) + (1 - y) * np.log(1 - y_predicted))
    return cost

In [49]:
def model(X, y, learning_rate, iterations):
    m, n = X.shape
    weights = np.zeros(n)
    bias = 0
    
    for i in range(iterations):
        projections = np.dot(X, weights) + bias
        predictions = sigmoid(projections)
        
        dw = (1 / m) * np.dot(X.T, (predictions - y))
        db = (1 / m) * np.sum(predictions - y)
        
        weights -= learning_rate * dw
        bias -= learning_rate * db
        
        if i % 100 == 0:
            cost = compute_cost(y, predictions)
            print(f"Iteration {_}, Cost: {cost}")
    return weights, bias


In [50]:
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

weights, bias = model(X_train, y_train, 0.1, 1000)
print(weights, bias)

TypeError: not all arguments converted during string formatting

In [51]:
test_predictions = sigmoid(np.dot(X_test, weights) + bias)
test_predictions = [1 if i > 0.5 else 0 for i in test_predictions]

# Print the accuracy of the model
print("Test Accuracy: ", np.mean(test_predictions == y_test))

Test Accuracy:  0.3
