In [441]:
import numpy as np
import pandas as pd

In [442]:
class SimpleLinearRegression:
    def __init__(self):
        self.slope=None
        self.intercept=None
        self.n_features_in=None
    
    def fit(self,X_train,y_train):
        X_train=X_train
        y_train = y_train
        mean_x=X_train.mean()
        mean_y=y_train.mean()
        self.n_features_in=X_train.shape[1]
        num=0
        den=0
        for  i in range(X_train.shape[0]):
            num+=((X_train[i]-mean_x)*(y_train[i]-mean_y))
            den+=((X_train[i]-mean_x)*(X_train[i]-mean_x))

        if den == 0:
            raise ValueError("Cannot fit line: all X values are the same.")

        self.slope=num/den
        self.intercept=mean_y-(self.slope)*(mean_x)

    def predict(self,X_test):
        X_test=X_test.flatten()
        prediction=self.slope*X_test+self.intercept
        return prediction.flatten()
    
    def r2_score(self,y_test,y_pred):
        res=0
        tot=0
        y_mean=y_test.mean()
        if y_test.shape[0]!=y_pred.shape[0]:
            raise ValueError("Given values do not have same dimensions")

        for i in range(y_test.shape[0]):
            res+=(y_test[i]-y_pred[i])**2
            tot+=(y_test[i]-y_mean)**2
        if tot==0:
            raise ValueError("r2 not defined")
        return 1-(res/tot)
    
    def adjusted_r2_score(self,y_test,y_pred):
        if y_test.shape[0]!=y_pred.shape[0]:
            raise ValueError("Given values do not have same dimensions")
        r=self.r2_score(y_test,y_pred)
        p=self.n_features_in
        n=y_test.shape[0]
        return 1-((1-r)*(n-1)/(n-p-1))
    
    def mse(self,y_test,y_pred):
        n=y_test.shape[0]
        mse=0
        if y_test.shape[0]!=y_pred.shape[0]:
            raise ValueError("Given values do not have same dimensions")
        for i in range(n):
            mse+=(y_test[i]-y_pred[i])**2
        return mse/n    

    def rmse(self,y_test,y_pred):
        return np.sqrt(self.mse(y_test,y_pred))

    def mae(self,y_test,y_pred):
        n=y_test.shape[0]
        mae=0
        if y_test.shape[0]!=y_pred.shape[0]:
            raise ValueError("Given values do not have same dimensions")
        for i in range(n):
            mae+=abs(y_test[i]-y_pred[i])
        return mae/n  
        



In [443]:
df=pd.read_csv('data/Placement.csv')

# Doing experimentation with sklearn


In [444]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [445]:
X=df['cgpa']
y=df['package']
X=np.array(X).reshape(-1,1)
y=np.array(y)

In [446]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [447]:
X_train.dtype

dtype('float64')

In [448]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_train shape: (12, 1)
y_train shape: (12,)
X_test shape: (7, 1)
y_test shape: (7,)


In [449]:
lr=LinearRegression()
lr.fit(X_train,y_train)

In [450]:
print(lr.coef_)
print(lr.intercept_)

[0.50124065]
-0.5912920555026511


In [451]:
y_train

array([2.89, 3.23, 2.09, 3.25, 3.51, 2.98, 3.57, 3.65, 2.48, 1.86, 3.42,
       2.6 ])

In [452]:
lr.predict(X_test)

array([2.86225601, 3.36349665, 2.72190862, 1.97506006, 2.46126349,
       2.83719397, 3.12791355])

# Doing Experimentation with my Class

In [453]:
slr=SimpleLinearRegression()
slr.fit(X_train,y_train)

In [454]:
y_pred=slr.predict(X_test)

## getting same results as sklearn

In [455]:
y_pred

array([2.86225601, 3.36349665, 2.72190862, 1.97506006, 2.46126349,
       2.83719397, 3.12791355])

In [456]:
print(slr.slope)
print(slr.intercept)

[0.50124065]
[-0.59129206]


In [457]:
slr.n_features_in

1

## Getting accurate r2_score

In [458]:
slr.r2_score(y_test,y_pred)

0.5934776212622068

In [459]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.5934776212622068

In [460]:
slr.adjusted_r2_score(y_test,y_pred)

0.5121731455146482

In [461]:
slr.rmse(y_test,y_pred)

0.3518689856869947

In [462]:
slr.mae(y_test,y_pred)

0.29694758924927606

In [463]:
slr.mse(y_test,y_pred)

0.12381178308839448