In [2]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score

In [3]:
X,y = make_regression(n_samples= 100, n_features =2, n_informative=2, n_targets=1,noise=50)

n_samples=100

Number of rows (observations) in your dataset.

So X will have 100 rows.

n_features=2

Number of independent variables (columns) in X.

So X will be shape (100, 2).

n_informative=2

Number of features that actually affect the target y.

Here both features are informative (none are useless/noise-only).

n_targets=1

Number of dependent variables.

Here we have one target variable y.

noise=50

Standard deviation of Gaussian noise added to y.

Makes the problem more realistic (data won’t be perfectly linear).

Higher value = more scatter around the regression line.

In [5]:
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})

In [6]:
df

Unnamed: 0,feature1,feature2,target
0,-0.145458,0.446627,-62.938722
1,0.845356,1.465370,113.667408
2,-1.454537,-0.224823,-181.967207
3,0.287613,-0.172373,63.386823
4,-0.191489,-0.986382,-56.496550
...,...,...,...
95,0.672805,-0.710899,100.502511
96,1.871583,0.924831,128.517911
97,0.296103,0.026440,25.768410
98,-0.706194,2.297798,52.868134


In [7]:
fig = px.scatter_3d(df, x='feature1',y='feature2',z='target')
fig.show()

In [8]:
# Train Test Split

from sklearn.model_selection import train_test_split

X_train, X_test , y_train , y_test = train_test_split(X,y,test_size=0.2,random_state=3)

In [9]:
X_train

array([[ 1.12457756, -1.3181004 ],
       [-1.28130241,  0.7073226 ],
       [ 1.71175303, -0.44499373],
       [-0.19148919, -0.98638208],
       [ 2.08136995, -0.19175438],
       [-1.9294446 ,  0.31522105],
       [-0.87273558,  0.30545813],
       [-1.64557633,  2.4300832 ],
       [ 1.59754405,  0.46270681],
       [ 0.36132938, -0.16925676],
       [-2.37411301, -0.27301095],
       [-0.32152009, -1.50092096],
       [-0.63725576, -0.9170704 ],
       [-0.49751101,  2.7787312 ],
       [ 0.02488886, -0.76369414],
       [ 0.28536594, -0.02901476],
       [ 0.74296645, -1.08291482],
       [-0.31466602, -1.60968284],
       [ 1.75257957, -0.43017017],
       [-1.51911843, -0.51175483],
       [-0.22005852,  0.23399746],
       [-0.93497291,  0.54227056],
       [ 0.33411176, -0.79654726],
       [-0.97134159,  1.56308698],
       [ 0.92861725,  0.02332462],
       [ 2.08627287, -0.8113058 ],
       [-1.55749921,  0.07052341],
       [ 1.82627748,  1.42587028],
       [ 0.62505007,

In [10]:
# Create LR Model

from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train,y_train)

In [11]:
# Doing Predictions

y_pred = regressor.predict(X_test)

# # Plotting of y_test vs y_pred

# plt.plot(y_test , color ='blue', label ='test')
# plt.plot(y_pred , color ='red', label ='train')

# plt.show()

In [12]:
y_pred

array([  21.7488101 ,   14.62071255,   52.7435932 ,   57.08822802,
        119.95415096, -114.2642102 ,    9.97781671,  -22.37256369,
         47.97395633,  -49.44349385,  -79.19370635,  140.56904566,
        -90.88756378,   15.60789976,   54.33660911,   18.68894017,
         52.7852319 ,  163.51723802, -103.35253881,  -48.51079053])

In [13]:
y_test


array([ -18.93427568,   25.35205968,  121.40932215,   33.84169067,
        128.51791119, -155.05882578,   52.86813383, -117.69910219,
         27.7049975 ,  -59.28311686,  -75.93319201,  103.60659304,
        -74.81471913,   25.76841008,   25.64137657,   -8.52488436,
        -18.75349372,  167.06388852, -141.03882191, -144.98283009])

In [14]:
print('MAE:',mean_absolute_error(y_test, y_pred))
print('MSE:',mean_squared_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2 Score:',r2_score(y_pred,y_test))

MAE: 34.630979512244814
MSE: 1967.874400136292
RMSE: 44.360730383260055
R2 Score: 0.6621509490794089


Only if you build multiple models , you can validate whether these values are good or not.