In [6]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv(r'../data/insurance.csv')

In [5]:
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


Linear regression is used to model the relationship between a dependent variable and independent variable by fitting a linear equation to the observed data. 

Line of Best Fit
- The line of best fit, or the regression line, is the line that minimizes the differences between the predictved values and the actual values of the dependent variable. The differences are measured using the loss function, commonly the Mean Squared Error

Mean Squared Error (MSE)
- MSE is the average of the squared differences between the observed actual outcomes and the outcomes predicted by the linear model.

Gradient Descent
- To find the line of best fit, the linear regression algorithm can use an optimization technique called Gradient Descent. Gradient descent iteratively adjusts the coefficients (slope and intercept in a simple linear regression) of the linear equation to minimize MSE.

- During each iteration, the algorithm computes the gradient (partial derivatives) of the MSE with respect to each coefficient. The coefficients are then updated in the opposite direction of the gradient to reduce the error.

- The process continues until the algorithm converges to a minimum MSE, which corresponds to the optimal line of best fit.

In [None]:
class LinearRegression: 
    def __init__(self, lr = 0.001, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        #init weights and biases as 0
        #need one weight for each feature of X
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        #bias is initialized as 0 because it 
        self.bias = 0

        for _ in range(self.n_iters):

        #to get y_pred, calculated the dot product 
        # between X and weights
        #note: dot product takes two equal length sequences
        # of numbers (vectors) and returns a single number
        # Numpy includes the summation (sigma)
            y_pred = np.dot(X, self.weights) + self.bias
            #gradient descent
            dw = (1/n_samples) * np.dot(X.T,(y_pred-y))
            db = (1/n_samples) * np.sum(y_pred-y)

            self.weight = self.weights - self.lr * dw
            self.bias = self.bias - self.lr * db

    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred

def mse(y_test, predictions):
    return np.mean((y_test-predictions)**2)


In [8]:
import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1-x2)**2))

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions
        
    def _predict(self, x):
        # comput the distance
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # get the closest k
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # majority vote
        most_common = Counter(k_nearest_labels.most_common())
        return most_common[0][0]