In [3]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [37]:
class logisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None
        self.costs = []
        self.m = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def propogate(self, X, y):
        m = X.shape[1]
        P = (np.dot(self.weights.T,X)+self.bias)
        A = self.sigmoid(P)
        cost = -1*(np.sum((y*np.log(A) +(1-y)*np.log(1-A)),axis=1,keepdims = True))/m
        dz = A - y

        dw = (1/m)*np.dot(X,dz.T)
    
        db = (1/m)*np.sum(dz)
        cost = np.squeeze(np.array(cost))

    
        grads = {"dw": dw,
             "db": db}
        return grads, cost
    
    def optimize(self,X, Y, num_iterations=100, learning_rate=0.009, print_cost=False):
    
    
        for i in range(num_iterations):
            
            grads, cost = self.propogate(X,Y)
            
            dw = grads["dw"]
            db = grads["db"]
            
            
            
            w = w - learning_rate*dw
            b = b - learning_rate*db
            
            
            
            # Record the costs
            if i % 100 == 0:
                self.costs.append(cost)
            
                # Print the cost every 100 training iterations
                if print_cost:
                    print ("Cost after iteration %i: %f" %(i, cost))
        
            params = {"w": w,
                "b": b}
        
            grads = {"dw": dw,
                "db": db}
        
            return params, grads, self.costs
        
    def fit(self, X, y):
        y = y.reshape(-1)  # (num_samples,)
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0

        for _ in range(self.num_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self.sigmoid(linear_model)  # shape (num_samples,)

            dw = (1 / num_samples) * np.dot(X.T, (y_predicted - y))  # shape (num_features,)
            db = (1 / num_samples) * np.sum(y_predicted - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db


    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self.sigmoid(linear_model)
        return np.where(y_predicted > 0.5, 1, 0).flatten()

In [38]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load data
data = load_breast_cancer()
X = data.data
y = data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train your model
model = logisticRegression(learning_rate=0.01, num_iterations=1000)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9824561403508771
Confusion Matrix:
 [[42  1]
 [ 1 70]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        43
           1       0.99      0.99      0.99        71

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



Testing to check if its a cat or not


In [44]:
import h5py
import numpy as np

def load_cat_dataset():
    train_dataset = h5py.File("train_catvnoncat.h5", "r")
    test_dataset = h5py.File("test_catvnoncat.h5", "r")

    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig = load_cat_dataset()

# Flatten and normalize
X_train = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T / 255.
X_test = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T / 255.
Y_train = train_set_y_orig.reshape(1, -1)
Y_test = test_set_y_orig.reshape(1, -1)

# Fit your model
model = logisticRegression(learning_rate=0.005, num_iterations=2000)
model.fit(X_train.T, Y_train.reshape(-1))

# Predict
Y_pred = model.predict(X_test.T)

# Accuracy
from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(Y_test.flatten(), Y_pred))

Accuracy: 0.7


NameError: name 'X_train_flat' is not defined