### Linear Regression

In [135]:
import numpy as np
from sklearn.model_selection import train_test_split

In [136]:
class LinearRegression():
    def __init__(self,learning_rate=0.01,epochs=100,reg=0.0):
        self.lr = learning_rate
        self.iters =epochs
        self.weights = None
        self.bias = None
        self.reg = reg

    def predict(self,X):
        return X.dot(self.weights) + self.bias

    def _update_weights(self,X,y):
        m = X.shape[0]
        y_pred = self.predict(X)
        error = y - y_pred
        dw = -(2*X.T.dot(error))/m
        reg = 2*(self.reg/m)*self.weights
        dw += reg
        db = -(2*np.sum(error))/m
        self.weights = self.weights - self.lr*dw
        self.bias = self.bias - self.lr*db
        return self

    def fit(self,X,y):
        _ , n = X.shape
        X = np.array(X)
        y = np.array(y).reshape(-1)
        self.weights = np.zeros(n)
        self.bias = 0
        self.error_list = []

        for _ in range(self.iters):
            self._update_weights(X,y)
            self.error_list.append(self.mse(X,y))

        return self


    def r2_score(self,X,y):
        y = np.array(y).reshape(-1)
        y_pred = self.predict(X)
        ss_res = np.sum((y-y_pred)**2)
        ss_tot = np.sum((y-y.mean())**2)
        return 1 - ss_res/ss_tot

    def mse(self,X,y):
        m = X.shape[0]
        y = np.array(y).reshape(-1)
        y_pred = self.predict(X)
        return np.sum((y-y_pred)**2)/m



In [137]:
# Model parameters (true weights/biases for testing)
true_w = np.array([2.0, -1.5])  # 2 features
true_b = 0.5
n_samples = 1000
noise_scale = 0.01

# Generate features (n_samples x n_features)
X = np.random.randn(n_samples, len(true_w))

# Generate targets: y = X@w + b + noise
noise = np.random.randn(n_samples, 1) * noise_scale
y = X @ true_w.reshape(-1, 1) + true_b + noise

# Train/test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [138]:
lr = LinearRegression(epochs=300,learning_rate=0.1,reg=0.1)
lr.fit(X_train,y_train)

<__main__.LinearRegression at 0x17f0fb410>

In [139]:
y_pred = lr.predict(X_test)
lr.r2_score(X_test,y_test)

0.9999871221438803

In [140]:
np.round(lr.mse(X_test,y_test),4)

0.0001

In [141]:
lr.weights,lr.bias

(array([ 1.99983506, -1.49987469]), 0.5001338823209551)

### Logistic Regression

In [142]:
class LogisticRegression():
    def __init__(self,learning_rate=0.01,epochs=100,reg=0.0):
        self.lr = learning_rate
        self.iters =epochs
        self.weights = None
        self.bias = None
        self.reg = reg

    def predict(self,X,threshold=0.5):
        z = X.dot(self.weights) + self.bias
        y_predicited = self._sigmoid(z)

        return (y_predicited > threshold).astype(int)

    def predict_proba(self,X):
        return self._sigmoid(X.dot(self.weights)+self.bias)

    def _update_weights(self,X,y):
        m = X.shape[0]
        z = np.dot(X,self.weights) + self.bias
        y_pred = self._sigmoid(z)
        dw = np.dot(X.T,(y_pred-y))/m
        reg = 2*(self.reg/m)*self.weights
        dw += reg
        db = np.sum(y_pred-y)/m
        self.weights = self.weights - self.lr*dw
        self.bias = self.bias - self.lr*db
        return self
    
    def _sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def fit(self,X,y):
        _ , n = X.shape
        X = np.array(X)
        y = np.array(y).reshape(-1)
        self.weights = np.zeros(n)
        self.bias = 0
        self.error_list = []

        for _ in range(self.iters):
            self._update_weights(X,y)

        return self

    def accuracy(self,X,y):
        y = np.array(y).reshape(-1)
        y_pred = self.predict(X)
        return np.mean(y_pred==y)

In [143]:
# Model parameters (true weights/biases)
true_w = np.array([2.0, -1.5])  # 2 features
true_b = 0.5
n_samples = 1000
noise_scale = 0.2

# Generate features (n_samples x n_features)
X = np.random.randn(n_samples, len(true_w))

# Generate logits: z = X@w + b
z = X @ true_w + true_b

# Apply sigmoid to get probabilities
probs = 1 / (1 + np.exp(-z))

# Sample binary labels (Bernoulli with p=probs) + noise
y = np.random.binomial(1, probs, n_samples)

# Train/test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"Class balance train: {np.bincount(y_train)}")

X_train shape: (800, 2)
y_train shape: (800,)
Class balance train: [355 445]


In [144]:
model = LogisticRegression(epochs=200,learning_rate=0.2)
model.fit(X_train,y_train)

<__main__.LogisticRegression at 0x17f0fb590>

In [145]:
from sklearn.metrics import classification_report

In [146]:
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))


              precision    recall  f1-score   support

           0       0.82      0.84      0.83        89
           1       0.87      0.86      0.86       111

    accuracy                           0.85       200
   macro avg       0.85      0.85      0.85       200
weighted avg       0.85      0.85      0.85       200



In [147]:
model.weights,model.bias

(array([ 1.77524599, -1.3094776 ]), 0.37508544126101717)

### K Nearest Neighbors

In [148]:
class KNearestClassifier():
    def __init__(self,n_nearest=5):
        self.k=n_nearest

    def fit(self,X,y):
        self.X = np.asarray(X,dtype=float)
        self.y = np.asarray(y,dtype=float).reshape(-1)

    def predict(self,Xqs):
        prediction=[]
        for xq in Xqs:
            prediction.append(self._predict_one_query_point(xq))
        return np.array(prediction)

    def _predict_one_query_point(self,xq):
        distance = np.sqrt(np.sum((self.X - xq)**2,axis=1))
        distance = np.array([[distance[i],self.y[i]] for i in range(len(distance))])
        distance = distance[np.argsort(distance[:,0])]
        distance = distance[:self.k]
        classes,counts = np.unique(distance[:,1],return_counts=True)
        return int(classes[counts.argmax()])

    def accuracy(self,X,y):
        y_pred = self.predict(X)
        return np.mean(y_pred==y)

In [149]:
knn = KNearestClassifier(n_nearest=5)
knn.fit(X_train,y_train)

In [150]:
y_pred = knn.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.80      0.76      0.78        89
           1       0.82      0.85      0.83       111

    accuracy                           0.81       200
   macro avg       0.81      0.81      0.81       200
weighted avg       0.81      0.81      0.81       200



### Weighted KNN

In [151]:
class KNearestClassifier():
    def __init__(self,n_nearest=5,weights=None):
        self.k=n_nearest
        self.weights = weights

    def fit(self,X,y):
        self.X = np.asarray(X,dtype=float)
        self.y = np.asarray(y,dtype=float).reshape(-1)

    def predict(self,Xqs):
        prediction=[]
        for xq in Xqs:
            prediction.append(self._predict_one_query_point(xq))
        return np.array(prediction)

    def _predict_one_query_point(self,xq):
        distance = np.sqrt(np.sum((self.X - xq)**2,axis=1))
        distance = np.array([[distance[i],self.y[i]] for i in range(len(distance))])
        distance = distance[np.argsort(distance[:,0])]
        k_nearest = distance[:self.k]
        if self.weights == None:
            classes,counts = np.unique(k_nearest[:,1],return_counts=True)
            return int(classes[counts.argmax()])
        else:
            epsilon = 1e-4
            class_scores = {}
            for dist,label in k_nearest:
                weight = 1/(dist + epsilon)
                if label in class_scores:
                    class_scores[label] += weight
                else:
                    class_scores[label] = weight
            return int(max(class_scores,key=class_scores.get))
            

    def accuracy(self,X,y):
        y_pred = self.predict(X)
        return np.mean(y_pred==y)

In [152]:
knn = KNearestClassifier(n_nearest=5,weights=True)
knn.fit(X_train,y_train)

In [153]:
y_pred = knn.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.74      0.79      0.77        89
           1       0.82      0.78      0.80       111

    accuracy                           0.79       200
   macro avg       0.78      0.79      0.78       200
weighted avg       0.79      0.79      0.79       200



In [8]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

### Decision Tree

In [9]:
class Node():
    def __init__(self,feature=None,threshold=None,value=None,left=None,right=None):
        self.feature=feature
        self.threshold=threshold
        self.value=value
        self.left=left
        self.right=right

    def is_leaf_node(self):
        return self.value is not None

In [10]:
class DecisionTree():
    def __init__(self,max_depth=10):
        self.root=None
        self.max_depth = max_depth

    def _gini_impurity(self,y):
        return 1 - np.sum((np.bincount(y)/len(y))**2)

    def _information_gain(self,parent,left_child,right_child):
        parent_gini = self._gini_impurity(parent)
        left_child_gini = self._gini_impurity(left_child)
        right_child_gini = self._gini_impurity(right_child)
        n1, n2 = len(left_child), len(right_child)
        n = len(parent)
        weighted_children_gini = (n1/n)*left_child_gini + (n2/n)*right_child_gini

        return parent_gini - weighted_children_gini

    def fit(self,X,y):
        print(np.unique(y))
        self.root = self._grow_tree(X,y,0)
    
    def _grow_tree(self,X,y,depth):
        n_labels = len(np.unique(y))

        if depth >= self.max_depth or n_labels == 1:
            most_common = np.bincount(y).argmax()
            return Node(value=most_common)

        split_feature_idx, split_threshold = self._best_split(X,y)

        left_child_idx = np.where(X[:,split_feature_idx] <= split_threshold)
        left = self._grow_tree(X[left_child_idx],y[left_child_idx], depth = depth+1)

        right_child_idx = np.where(X[:,split_feature_idx] > split_threshold)
        right = self._grow_tree(X[right_child_idx],y[right_child_idx], depth = depth+1)

        return Node(feature=split_feature_idx,threshold=split_threshold, left=left, right=right)

    def _best_split(self,X,y):
        n_features = X.shape[1]
        split_threshold,best_info_gain = None, -1
        split_feature_idx = None
        for feature_idx in range(n_features):
            X_column = X[:,feature_idx]
            thresholds = np.unique(X_column)
            for threshold in thresholds:
                left_child_idx = np.where(X_column <= threshold) 
                right_child_idx = np.where(X_column > threshold) 

                info_gain = self._information_gain(y,y[left_child_idx],y[right_child_idx])
                if info_gain > best_info_gain:
                    best_info_gain = info_gain
                    split_threshold = threshold
                    split_feature_idx = feature_idx

        return split_feature_idx,split_threshold

    def predict(self,X):
        return np.array([self._traverse_tree(x,self.root) for x in X])

    def _traverse_tree(self,x, node):
        if node.is_leaf_node():
            return node.value

        split_idx = node.feature
        split_threshold = node.threshold
        if x[split_idx] <= split_threshold:
            return self._traverse_tree(x,node.left)
        else:
            return self._traverse_tree(x,node.right)


In [11]:
from sklearn.datasets import make_classification
X, y = make_classification(
    n_samples=1000,
    n_features=6,         # all numeric float features
    n_informative=5,
    n_redundant=1,
    n_classes=2,
    n_clusters_per_class=1,
    class_sep=1.5,
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [13]:
dt = DecisionTree()
dt.fit(X_train,y_train)

[0 1]


In [14]:
y_pred = dt.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97       100
           1       0.98      0.96      0.97       100

    accuracy                           0.97       200
   macro avg       0.97      0.97      0.97       200
weighted avg       0.97      0.97      0.97       200

