In [47]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [48]:
X,y = make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)

In [49]:
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})


In [50]:

df.shape

(100, 3)

In [51]:
df.head()


Unnamed: 0,feature1,feature2,target
0,-0.359716,0.543359,-6.964515
1,1.673838,0.320939,61.956448
2,0.163191,-0.3208,31.857419
3,-0.743876,0.466503,-33.498611
4,-0.268195,0.182973,17.625188


In [52]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.show()

In [53]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=3)

In [54]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [55]:
lr.fit(X_train,y_train)


In [56]:
y_pred = lr.predict(X_test)


In [57]:

print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2 score",r2_score(y_test,y_pred))


MAE 47.149972704612615
MSE 3732.321765736825
R2 score 0.6182288756212949


In [58]:
x_range = np.linspace(df['feature1'].min(), df['feature1'].max(), 10)
y_range = np.linspace(df['feature2'].min(), df['feature2'].max(), 10)
x_grid, y_grid = np.meshgrid(x_range, y_range)
final = np.vstack((x_grid.ravel().reshape(1,100),y_grid.ravel().reshape(1,100))).T
z_final = lr.predict(final).reshape(10,10)

fig = px.scatter_3d(df, x=df['feature1'].ravel(), y=df['feature2'].ravel(), z=df['target'].ravel())

fig.add_trace(go.Surface(x = x_range, y = y_range, z =z_final ))

fig.show()


Series.ravel is deprecated. The underlying array is already 1D, so ravel is not necessary.  Use `to_numpy()` for conversion to a numpy array instead.



In [59]:


# Prepare data for the best-fit plane
x_range = np.linspace(df['feature1'].min(), df['feature1'].max(), 10)
y_range = np.linspace(df['feature2'].min(), df['feature2'].max(), 10)
x_grid, y_grid = np.meshgrid(x_range, y_range)
z_grid = lr.intercept_ + lr.coef_[0] * x_grid + lr.coef_[1] * y_grid

# Visualize data points and regression plane
fig = go.Figure()

# Another Method for Visualiztion


In [60]:


# Add scatter plot for actual data points
fig.add_trace(go.Scatter3d(
    x=df['feature1'],
    y=df['feature2'],
    z=df['target'],
    mode='markers',
    marker=dict(size=5, color='blue'),
    name='Data Points'
))
# Add regression plane
fig.add_trace(go.Surface(
    x=x_grid,
    y=y_grid,
    z=z_grid,
    colorscale='Viridis',
    opacity=0.7,
    name='Best Fit Plane'
))

# Update layout for better visualization
fig.update_layout(
    scene=dict(
        xaxis_title='Feature 1',
        yaxis_title='Feature 2',
        zaxis_title='Target'
    ),
    title='3D Visualization with Best Fit Plane'
)

fig.show()

In [61]:

lr.coef_

array([14.43833963, 50.59220326])

In [62]:

lr.intercept_

8.186834435617188

# BATCH GRADIENT DECENT

In [63]:
class GDRegressor:
    
    def __init__(self,learning_rate=0.01,epochs=100):
        
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        
    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            # update all the coef and the intercept
            y_hat = np.dot(X_train,self.coef_) + self.intercept_
            #print("Shape of y_hat",y_hat.shape)
            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)
            
            coef_der = -2 * np.dot((y_train - y_hat),X_train)/X_train.shape[0]
            self.coef_ = self.coef_ - (self.lr * coef_der)
        
        print(self.intercept_,self.coef_)
    
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [64]:
gdr = GDRegressor(epochs=1000,learning_rate=0.5)

In [65]:
gdr.fit(X_train,y_train)

8.186834435617193 [14.43833963 50.59220326]


In [66]:
y_pred = gdr.predict(X_test)

In [67]:
r2_score(y_test,y_pred)

0.6182288756212949