In [9]:
# linear regression with k fold and from scratch
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
df=pd.read_csv("Synthetic_House_Price_Dataset.csv")
print(df.head())

   Area  Bedrooms  Location     Price
0  1360         2     Urban  10998707
1  4272         3     Urban  34199426
2  3592         1     Urban  28695658
3   966         6     Urban   8444717
4  4926         1  Suburban  24865588


In [17]:
df.dropna()
df=pd.get_dummies(df,columns=['Location'],drop_first=True)

In [49]:
X=df[["Area","Bedrooms","Location_Suburban","Location_Urban"]].values
y=df["Price"].values.reshape(-1,1)

In [51]:
X=np.hstack((np.ones((X.shape[0],1)),X))
X=X.astype(float)
y=y.astype(float)


In [63]:
class LinearRegression():
    def __init__(self):
        self.weight=None

    def fit(self,X,y):
        XTX=X.T @ X
        XTy=X.T @ y
        self.weights=np.linalg.inv(XTX)@XTy

    def predict(self,X):
        return X @ self.weights

    def mse(self,y_true,y_pred):
        return np.mean((y_true-y_pred)**2)
        
    def r2_score(self,y_true,y_pred):
        numerator=np.sum((y_true-y_pred)**2)
        denominator=np.sum((y_true-np.mean(y_true))**2)
        return 1- (numerator/denominator)
    
        

In [67]:
kf=KFold(n_splits=5,shuffle=True,random_state=42)
mse_scores=[]
r2_scores=[]
for train_idx,test_idx in kf.split(X):
    X_train,X_test=X[train_idx],X[test_idx]
    y_train,y_test=y[train_idx],y[test_idx]
    model=LinearRegression()
    model.fit(X_train,y_train)
    y_pred=model.predict(X_test)
    mse=model.mse(y_test,y_pred)
    r2=model.r2_score(y_test,y_pred)
    mse_scores.append(mse)
    r2_scores.append(r2)
    
    

In [75]:
for i in range(5):
    print(f"Fold {i+1}: MSE= {mse_scores[i]},R2 = {r2_scores[i]}")

print("\nAvergae MSE:",np.mean(mse_scores))
print("\nAvergae R2:",np.mean(r2_scores))

Fold 1: MSE= 6146625657237.866,R2 = 0.9397103826581621
Fold 2: MSE= 6538450873117.69,R2 = 0.9262752549560571
Fold 3: MSE= 6637870262702.048,R2 = 0.9344220881171971
Fold 4: MSE= 6030674972965.662,R2 = 0.9430692407483944
Fold 5: MSE= 7311221149794.676,R2 = 0.9271322037943917

Avergae MSE: 6532968583163.588

Avergae R2: 0.9341218340548405
