In [4]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

X,y = make_regression(n_samples = 100,n_features = 3,n_informative=2,n_targets = 1,noise = 50)
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'feature3':X[:,2],'target':y})
print(df.head(),'\n\n')
print(df.shape)

fig = px.scatter_3d(df,x = 'feature1',y = 'feature2',z = 'target', color='feature3')
fig.show()

   feature1  feature2  feature3      target
0 -1.435184  0.304595  0.958299  -54.177586
1 -0.102378  0.132542  0.927063    6.192631
2 -0.693609 -1.169560 -0.179242 -171.368670
3 -2.221596 -0.195525  0.637051 -153.402746
4 -0.114714  0.099427 -0.605519  -44.098753 


(100, 4)


In [7]:
# Now we will plot a best fit regression plane in these data
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 3)

In [8]:
lr = LinearRegression()

In [9]:
lr.fit(X_train,y_train)

In [10]:
y_pred = lr.predict(X_test)

In [11]:
print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2 Score",r2_score(y_test,y_pred))

MAE 44.59464166969335
MSE 3305.3716278441484
R2 Score 0.368740370483875


In [12]:
x = np.linspace(-5,5,10)
y = np.linspace(-5,5,10)
xGrid,yGrid = np.meshgrid(y,x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T
z_final = lr.predict(final).reshape(10,10)
z = z_final

ValueError: X has 2 features, but LinearRegression is expecting 3 features as input.

In [13]:
x = np.linspace(df['feature1'].min(),df['feature1'].max(),10) # Use min/max from data for better visualization range
y = np.linspace(df['feature2'].min(),df['feature2'].max(),10) # Use min/max from data for better visualization range
xGrid,yGrid = np.meshgrid(y,x)

# Create a grid for feature1 and feature2
final = np.vstack((xGrid.ravel(), yGrid.ravel())).T

# Get the mean of the third feature from the training data
feature3_mean = X_train[:, 2].mean()

# Create an array of the mean of the third feature with the same number of samples as the grid
feature3_mean_array = np.full((final.shape[0], 1), feature3_mean)

# Combine the grid for feature1 and feature2 with the mean of feature3
final_with_feature3 = np.hstack((final, feature3_mean_array))

# Predict using the array with all 3 features
z_final = lr.predict(final_with_feature3).reshape(10,10)
z = z_final

In [1]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Generate synthetic regression data with 3 features and 1 target
X, y = make_regression(
    n_samples=100,
    n_features=3,
    n_informative=2,
    n_targets=1,
    noise=50,
    random_state=42
)

# Create a DataFrame from the generated data
df = pd.DataFrame(X, columns=['feature1', 'feature2', 'feature3'])
df['target'] = y

# Display first few rows and shape
print(df.head(), '\n\n')
print("DataFrame shape:", df.shape)

# 3D scatter plot (feature1, feature2, target) with color encoded by feature3
fig = px.scatter_3d(
    df,
    x='feature1',
    y='feature2',
    z='target',
    color='feature3',  # Optional: use color for extra dimension
    title='3D Scatter of Regression Data'
)
fig.show()

   feature1  feature2  feature3      target
0 -0.518270  1.477894  0.357113   67.712769
1  0.954002  1.135566  0.570891  108.807691
2  0.856399 -0.446515 -1.514847  -70.707893
3 -0.161286 -0.802277 -0.342715  -65.176165
4  0.357787 -1.142970  0.058209  -13.755321 


DataFrame shape: (100, 4)
