### Linear Regression

In [40]:
import numpy as np
from sklearn.model_selection import train_test_split

In [41]:
class LinearRegression():
    def __init__(self,learning_rate=0.01,epochs=100,reg=0.0):
        self.lr = learning_rate
        self.iters =epochs
        self.weights = None
        self.bias = None
        self.reg = reg

    def predict(self,X):
        return X.dot(self.weights) + self.bias

    def _update_weights(self,X,y):
        m = X.shape[0]
        y_pred = self.predict(X)
        error = y - y_pred
        dw = -(2*X.T.dot(error))/m
        reg = 2*(self.reg/m)*self.weights
        dw += reg
        db = -(2*np.sum(error))/m
        self.weights = self.weights - self.lr*dw
        self.bias = self.bias - self.lr*db
        return self

    def fit(self,X,y):
        _ , n = X.shape
        X = np.array(X)
        y = np.array(y).reshape(-1)
        self.weights = np.zeros(n)
        self.bias = 0
        self.error_list = []

        for _ in range(self.iters):
            self._update_weights(X,y)
            self.error_list.append(self.mse(X,y))

        return self


    def r2_score(self,X,y):
        y = np.array(y).reshape(-1)
        y_pred = self.predict(X)
        ss_res = np.sum((y-y_pred)**2)
        ss_tot = np.sum((y-y.mean())**2)
        return 1 - ss_res/ss_tot

    def mse(self,X,y):
        m = X.shape[0]
        y = np.array(y).reshape(-1)
        y_pred = self.predict(X)
        return np.sum((y-y_pred)**2)/m



In [42]:
# Model parameters (true weights/biases for testing)
true_w = np.array([2.0, -1.5])  # 2 features
true_b = 0.5
n_samples = 1000
noise_scale = 0.01

# Generate features (n_samples x n_features)
X = np.random.randn(n_samples, len(true_w))

# Generate targets: y = X@w + b + noise
noise = np.random.randn(n_samples, 1) * noise_scale
y = X @ true_w.reshape(-1, 1) + true_b + noise

# Train/test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [43]:
lr = LinearRegression(epochs=300,learning_rate=0.1,reg=0.1)
lr.fit(X_train,y_train)

<__main__.LinearRegression at 0x1643c3830>

In [44]:
y_pred = lr.predict(X_test)
lr.r2_score(X_test,y_test)

0.9999783995472641

In [45]:
np.round(lr.mse(X_test,y_test),4)

0.0001

In [46]:
lr.weights,lr.bias

(array([ 2.00006332, -1.49976914]), 0.5000720278378139)

### Logistic Regression

In [47]:
class LogisticRegression():
    def __init__(self,learning_rate=0.01,epochs=100,reg=0.0):
        self.lr = learning_rate
        self.iters =epochs
        self.weights = None
        self.bias = None
        self.reg = reg

    def predict(self,X,threshold=0.5):
        z = X.dot(self.weights) + self.bias
        y_predicited = self._sigmoid(z)

        return (y_predicited > threshold).astype(int)

    def predict_proba(self,X):
        return self._sigmoid(X.dot(self.weights)+self.bias)

    def _update_weights(self,X,y):
        m = X.shape[0]
        z = np.dot(X,self.weights) + self.bias
        y_pred = self._sigmoid(z)
        dw = np.dot(X.T,(y_pred-y))/m
        reg = 2*(self.reg/m)*self.weights
        dw += reg
        db = np.sum(y_pred-y)/m
        self.weights = self.weights - self.lr*dw
        self.bias = self.bias - self.lr*db
        return self
    
    def _sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def fit(self,X,y):
        _ , n = X.shape
        X = np.array(X)
        y = np.array(y).reshape(-1)
        self.weights = np.zeros(n)
        self.bias = 0
        self.error_list = []

        for _ in range(self.iters):
            self._update_weights(X,y)

        return self

    def accuracy(self,X,y):
        y = np.array(y).reshape(-1)
        y_pred = self.predict(X)
        return np.mean(y_pred==y)

In [48]:
# Model parameters (true weights/biases)
true_w = np.array([2.0, -1.5])  # 2 features
true_b = 0.5
n_samples = 1000
noise_scale = 0.2

# Generate features (n_samples x n_features)
X = np.random.randn(n_samples, len(true_w))

# Generate logits: z = X@w + b
z = X @ true_w + true_b

# Apply sigmoid to get probabilities
probs = 1 / (1 + np.exp(-z))

# Sample binary labels (Bernoulli with p=probs) + noise
y = np.random.binomial(1, probs, n_samples)

# Train/test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"Class balance train: {np.bincount(y_train)}")

X_train shape: (800, 2)
y_train shape: (800,)
Class balance train: [331 469]


In [49]:
model = LogisticRegression(epochs=200,learning_rate=0.2)
model.fit(X_train,y_train)

<__main__.LogisticRegression at 0x1643c1ca0>

In [50]:
from sklearn.metrics import classification_report

In [51]:
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))


              precision    recall  f1-score   support

           0       0.72      0.70      0.71        83
           1       0.79      0.80      0.80       117

    accuracy                           0.76       200
   macro avg       0.75      0.75      0.75       200
weighted avg       0.76      0.76      0.76       200



In [52]:
model.weights,model.bias

(array([ 1.97021542, -1.46754855]), 0.5768325384979366)

### K Nearest Neighbors

In [53]:
class KNearestClassifier():
    def __init__(self,n_nearest=5):
        self.k=n_nearest

    def fit(self,X,y):
        self.X = np.asarray(X,dtype=float)
        self.y = np.asarray(y,dtype=float).reshape(-1)

    def predict(self,Xqs):
        prediction=[]
        for xq in Xqs:
            prediction.append(self._predict_one_query_point(xq))
        return np.array(prediction)

    def _predict_one_query_point(self,xq):
        distance = np.sqrt(np.sum((self.X - xq)**2,axis=1))
        distance = np.array([[distance[i],self.y[i]] for i in range(len(distance))])
        distance = distance[np.argsort(distance[:,0])]
        distance = distance[:self.k]
        classes,counts = np.unique(distance[:,1],return_counts=True)
        return int(classes[counts.argmax()])

    def accuracy(self,X,y):
        y_pred = self.predict(X)
        return np.mean(y_pred==y)

In [54]:
knn = KNearestClassifier(n_nearest=5)
knn.fit(X_train,y_train)

In [55]:
y_pred = knn.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.81      0.71      0.76        83
           1       0.81      0.88      0.84       117

    accuracy                           0.81       200
   macro avg       0.81      0.80      0.80       200
weighted avg       0.81      0.81      0.81       200



### Weighted KNN

In [56]:
class KNearestClassifier():
    def __init__(self,n_nearest=5,weights=None):
        self.k=n_nearest
        self.weights = weights

    def fit(self,X,y):
        self.X = np.asarray(X,dtype=float)
        self.y = np.asarray(y,dtype=float).reshape(-1)

    def predict(self,Xqs):
        prediction=[]
        for xq in Xqs:
            prediction.append(self._predict_one_query_point(xq))
        return np.array(prediction)

    def _predict_one_query_point(self,xq):
        distance = np.sqrt(np.sum((self.X - xq)**2,axis=1))
        distance = np.array([[distance[i],self.y[i]] for i in range(len(distance))])
        distance = distance[np.argsort(distance[:,0])]
        k_nearest = distance[:self.k]
        if self.weights == None:
            classes,counts = np.unique(k_nearest[:,1],return_counts=True)
            return int(classes[counts.argmax()])
        else:
            epsilon = 1e-4
            class_scores = {}
            for dist,label in k_nearest:
                weight = 1/(dist + epsilon)
                if label in class_scores:
                    class_scores[label] += weight
                else:
                    class_scores[label] = weight
            return int(max(class_scores,key=class_scores.get))
            

    def accuracy(self,X,y):
        y_pred = self.predict(X)
        return np.mean(y_pred==y)

In [57]:
knn = KNearestClassifier(n_nearest=5,weights=True)
knn.fit(X_train,y_train)

In [58]:
y_pred = knn.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.77      0.72      0.75        83
           1       0.81      0.85      0.83       117

    accuracy                           0.80       200
   macro avg       0.79      0.78      0.79       200
weighted avg       0.79      0.80      0.79       200

