In [4]:
import numpy as np
import pandas as pd

In [43]:
# creating LR class
class LinearRegression:
    def __init__(self, lr = 0.0001, iter = 1000):
        self.lr = lr
        self.iter = iter
        self.slope = None
        self.intercept = None
        
    # creating fit methond   
    def fit(self, X, y):
        sample, features = X.shape
        self.slope = np.zeros(features)
        self.intercept = 0
        
        # compute gradients
        for i in range(self.iter):
            y_pred = self.predict(X)
            
            d_slope = (1 / sample) * np.dot(X.T, (y_pred - y))
            d_intercept = (1 / sample) * np.sum(y_pred - y)
            
            # update parameters
            self.slope -= self.lr * d_slope
            self.intercept -= self.lr * d_intercept
            
    # predicting
    def predict(self, X):
        y_pred =  self.intercept + np.dot(X, self.slope)
        return y_pred
        

In [12]:
# loading dataframe

data = pd.read_csv('../data/car.csv')
data.head()

Unnamed: 0,Age,Mileage,Horsepower,Price
0,7,84298,345,22235.1
1,9,96252,333,19517.4
2,3,6559,211,28782.05
3,6,69206,290,23439.7
4,6,51176,143,22871.2


In [16]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Age         1000 non-null   int64  
 1   Mileage     1000 non-null   int64  
 2   Horsepower  1000 non-null   int64  
 3   Price       1000 non-null   float64
dtypes: float64(1), int64(3)
memory usage: 31.4 KB


In [18]:
# checking duplicated values
data.duplicated().sum()

0

In [19]:
# checking null values
data.isna().sum()

Age           0
Mileage       0
Horsepower    0
Price         0
dtype: int64

In [20]:
# deviding into train and test split without using any additional libraries

split = int(0.8 * len(data))
print(split)

800


In [24]:
# deviding into train and test split using 80% data for train and 20% for test
X_train = data[['Age', 'Mileage', 'Horsepower']].iloc[:split].values
X_test = data[['Age', 'Mileage', 'Horsepower']].iloc[split:].values
y_train = data['Price'].iloc[:split].values
y_test = data['Price'].iloc[split:].values

In [26]:
# confirming shape
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((800, 3), (200, 3), (800,), (200,))

In [31]:
# let's standarize our data 

def standarized(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    
    return (X - mean) / std
    

In [32]:
# let's apply our function to X_train & X_test
X_train_standarized = standarized(X_train)
X_test_standarized = standarized(X_test)

In [41]:
# printing standarized X_test
X_test_standarized[:10]

array([[ 1.50974574,  1.36149844, -0.96915731],
       [ 0.34391891, -1.18898172, -0.4570997 ],
       [ 0.34391891, -1.07147046,  0.39044393],
       [-1.21051686,  0.1288964 ,  1.16735892],
       [ 0.73252786, -0.45140525,  1.37924482],
       [-0.82190791,  1.29251105,  1.37041624],
       [-1.21051686, -1.0952191 , -1.50770065],
       [ 0.34391891,  1.46816197,  0.67295847],
       [ 1.50974574, -0.41674046,  0.46107256],
       [-1.21051686, -0.43142081,  0.85835864]])

In [42]:
# printing standarized X_train
X_train_standarized[:10]

array([[ 0.7610666 ,  1.15026518,  0.4621742 ],
       [ 1.51599937,  1.57809001,  0.35573355],
       [-0.74879894, -1.63195625, -0.72641314],
       [ 0.38360021,  0.61013364, -0.02567881],
       [ 0.38360021, -0.03514676, -1.32957686],
       [-1.50373171,  1.54409022,  0.32912338],
       [-0.74879894,  1.44176872, -1.48036779],
       [ 0.38360021,  1.14010103,  0.92341705],
       [ 0.00613383, -0.75919931,  0.15172229],
       [ 0.38360021, -1.56785769,  0.69279563]])

In [44]:
# instantiating model
model = LinearRegression()

In [46]:
# fitting our training set into the model
model.fit(X_train_standarized, y_train)

In [47]:
model.slope

array([-243.91548366, -121.44534126,   95.96667426])

In [48]:
model.intercept

2411.2773000171683

In [49]:
# checking prediction
y_pred = model.predict(X_test_standarized)
y_pred

array([1784.67249152, 2427.92010399, 2494.98485262, 2802.91479013,
       2419.78501683, 2586.29821009, 2694.8613437 , 2213.67030618,
       2137.88572453, 2841.30897519, 2638.54107299, 2767.25389951,
       2251.73903877, 1946.82737593, 1959.10165353, 2284.70153118,
       2417.70632635, 2569.54770891, 2009.21198549, 2423.54557007,
       2262.03142117, 2611.88336061, 2839.17102709, 2489.01905521,
       2117.8809679 , 2683.67679996, 2844.83243207, 2722.35926592,
       2499.08595878, 2971.46424608, 2594.08305023, 2277.24875657,
       2449.25697184, 2154.27011332, 2362.61802809, 2150.68590633,
       2078.39860856, 1991.20193518, 2509.11133947, 2553.13901806,
       2769.54257927, 2478.49921452, 2310.40065783, 2003.81032017,
       2200.75538179, 2566.73534327, 2305.54788773, 2555.81473519,
       2445.14522441, 2898.49642241, 1849.05227495, 2110.75075878,
       2087.66827768, 2120.80157572, 2986.85837262, 2617.27020301,
       2468.28789834, 2185.25979552, 2350.80709364, 2679.21766