<a href="https://colab.research.google.com/github/Nithya07shree/colab-notes-aiml/blob/main/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [8]:
class logisticRegression:
  def __init__(self,lr=0.01,iter=1000):
    self.lr = lr
    self.iter = iter
    self.weights = None
    self.bias = None

  def sigmoid(self,z):
    #calculate sigmoid of z
    return np.where(z>=0,
                    1/(1+np.exp(-z)),
                    np.exp( z)/(1+np.exp(z)))

  def fit(self,X,y):
    n_samples, n_features = X.shape
    self.weights = np.zeros(n_features)
    self.bias = 0

    for _ in range(self.iter):
      # prediction
      z=np.dot(X,self.weights)+self.bias
      y_pred=self.sigmoid(z)
      # calculate gradient
      dw=(1/n_samples)*np.dot(X.T,(y_pred-y))
      db=(1/n_samples)*np.sum(y_pred-y)
      #update weights and bias
      self.weights-=self.lr*dw
      self.bias-=self.lr*db

  def predict(self,X):
    z=np.dot(X,self.weights)+self.bias
    y_pred=self.sigmoid(z)
    prediction = (y_pred>0.5).astype(int)
    return prediction

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
#load data
X,y = load_breast_cancer(return_X_y=True)
#shuffle data
X_sub, y_sub = shuffle(X,y,n_samples=200, random_state=42)
#split data
X_train,X_test,y_train,y_test = train_test_split(
    X_sub,y_sub,test_size=0.20, random_state=42, stratify=y_sub)
# stratify -  the number of malignant vs. benign cases might be imbalanced. By setting stratify=y_subset, you ensure that both your 160 training samples and 40 testing samples have the same proportion of cancer cases as the original 200-sample selection.

In [9]:
#initialize model
model = logisticRegression(lr=0.01,iter=1000)
#train model
model.fit(X_train,y_train)

  return 1/(1+np.exp(-z))


In [10]:
y_predict = model.predict(X_sub)

  return 1/(1+np.exp(-z))


In [11]:
print(y_predict)

[0 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1
 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 0 0
 1 1 0 1 0 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 1 1 0 1 0 1 1 0 1 0 0
 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 0 0
 0 1 1 0 1 0 1 1 1 1 0 1 1 0 1]


In [14]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_sub,y_predict)
print(acc)

0.955


FOR MULTINOMIAL LOGISTIC REGRESSION

In [41]:
class logisticRegressionMultinomial:
  def __init__(self,lr=0.01,iter=1000):
    self.lr = lr
    self.iter = iter
    self.weights = None
    self.bias = None

  def softmax(self,z): #use softmax function instead of sigmoid
    #calculate sigmoid of z
    exp_z = np.exp(z-np.max(z,axis=1,keepdims=True))
    return exp_z/ np.sum(exp_z, axis=1, keepdims=True)

  def fit(self,X,y):
    n_samples, n_features = X.shape
    k = y.shape[1] # no. of categories
    self.weights = np.zeros((n_features,k))
    self.bias = np.zeros((1,k))

    for _ in range(self.iter):
      # prediction
      z=np.dot(X,self.weights)+self.bias
      y_pred=self.softmax(z)
      # calculate gradient
      dw=(1/n_samples)*np.dot(X.T,(y_pred-y))
      db=(1/n_samples)*np.sum(y_pred-y, axis=0, keepdims=True)
      #update weights and bias
      self.weights-=self.lr*dw
      self.bias-=self.lr*db

  def predict(self,X):
    z=np.dot(X,self.weights)+self.bias
    y_pred=self.softmax(z)
    return np.argmax(y_pred,axis=1)

In [22]:
#load data
from sklearn.datasets import load_digits
X,y = load_digits(return_X_y=True)


In [23]:
# one hot encode the labels
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
y_one_hot = lb.fit_transform(y)

In [19]:
print(f"original: {y[0]}")
print(f"one hot: {y_one_hot[0]}")

original: 0
one hot: [1 0 0 0 0 0 0 0 0 0]


In [42]:
# Split into Train/Test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_one_hot, test_size=0.2, random_state=42
)

In [43]:
#initialize model
mlor = logisticRegressionMultinomial(lr=0.01,iter=1000)
#train model
mlor.fit(X_train,y_train)

In [44]:
# prediction
y_pred = mlor.predict(X)

In [45]:
# check accuracy
from sklearn.metrics import accuracy_score
acc = accuracy_score(y,y_pred)
print(acc)

0.9877573734001113
