## MULTIPLE LINEAR REGRESSION   

In [1]:
# Import the function make_regression to generate a synthetic regression dataset
from sklearn.datasets import make_regression  

# Import NumPy for numerical operations like arrays, math calculations, etc.
import numpy as np  

# Import Pandas for handling datasets using DataFrames (tables)
import pandas as pd  

# Import Matplotlib's pyplot for creating basic plots and graphs
import matplotlib.pyplot as plt  

# Import Plotly Express for quick and easy interactive visualizations
import plotly.express as px  

# Import Plotly Graph Objects for building detailed/custom interactive plots
import plotly.graph_objects as go  

# Import evaluation metrics to measure regression model performance
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  


In [2]:
# Generate a synthetic regression dataset (randomly created data for regression problems)
x, y = make_regression(
    n_samples=100,       # Total number of data points (rows) = 100
    n_features=2,        # Total number of input features (columns) = 2
    n_informative=2,     # Number of useful (important) features = 2 (both features affect output)
    n_targets=1,         # Number of output values (target variables) = 1 (single output regression)
    noise=50             # Adds random noise to the output to make data imperfect (more realistic)
)


In [3]:
# Create a Pandas DataFrame (table) to store the dataset in a structured format
df = pd.DataFrame({
    'feature1': x[:, 0],   # Take the 1st column of x (all rows) and name it as "feature1"
    'feature2': x[:, 1],   # Take the 2nd column of x (all rows) and name it as "feature2"
    'target': y            # Store the output/label values and name it as "target"
})


In [None]:
df.head() # Display the first few rows of the DataFrame to see the data structure

Unnamed: 0,feature1,feature2,target
0,1.172855,-0.937636,217.850146
1,1.092263,0.170451,135.125666
2,-0.170979,0.569643,34.408242
3,-0.837202,0.582251,-96.275309
4,-1.737672,0.313644,-94.464809


In [4]:
df.shape # Check the dimensions of the DataFrame (number of rows and columns)

(100, 3)

In [5]:
# Create a 3D scatter plot using Plotly Express
fig = px.scatter_3d(
    df,                  # DataFrame containing the data
    x='feature1',         # Use "feature1" column for X-axis
    y='feature2',         # Use "feature2" column for Y-axis
    z='target'            # Use "target" column for Z-axis (output values)
)

# Display/show the interactive 3D scatter plot in output
fig.show()


In [None]:
# Import train_test_split function to divide the dataset into training and testing parts
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    x,                  # Input features (independent variables)
    y,                  # Target/output values (dependent variable)
    test_size=0.2,      # 20% data will be used for testing, 80% for training
    random_state=3      # Fixes randomness so results are same every time you run the code
)

# Import Linear Regression model from sklearn to create a regression model
from sklearn.linear_model import LinearRegression

In [None]:
# Create an object (instance) of the Linear Regression model
# This model will learn the relationship between input features (x) and output (y)
lr = LinearRegression()

In [None]:
# Train the Linear Regression model using the training data
# The model learns the best-fit line/plane relationship between x_train (features) and y_train (target)
lr.fit(x_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [None]:
# Predict the target/output values using the trained model for the test dataset
# x_test is the input (features) and y_pred will store the predicted values
y_pred = lr.predict(x_test)

In [None]:
# Print MAE (Mean Absolute Error)
# It shows the average absolute difference between actual values (y_test) and predicted values (y_pred)
# Lower MAE = better model performance
print("MAE", mean_absolute_error(y_test, y_pred))

# Print MSE (Mean Squared Error)
# It shows the average of squared differences between actual and predicted values
# Squaring gives more penalty to large errors
# Lower MSE = better model performance
print("MSE", mean_squared_error(y_test, y_pred))

# Print R2 Score (Coefficient of Determination)
# It tells how much variance in the target is explained by the model
# R2 = 1 means perfect prediction, 0 means model is useless, negative means worse than average prediction
print("R2 score", r2_score(y_test, y_pred))

MAE 42.25101837564533
MSE 2795.7834305011647
R2 score 0.8040613103054952


In [None]:
# Create 10 equally spaced values from -5 to 5 for x-axis
x = np.linspace(-5, 5, 10)

# Create 10 equally spaced values from -5 to 5 for y-axis
y = np.linspace(-5, 5, 10)

# Create a 2D grid (mesh) of x and y coordinates
# xGrid and yGrid will each be of shape (10, 10)
xGrid, yGrid = np.meshgrid(x, y)

# Create input data for prediction in the correct format (2 columns: feature1 and feature2)
final = np.vstack((
    xGrid.ravel(),       # Convert xGrid from (10,10) to a 1D array of 100 values
    yGrid.ravel()        # Convert yGrid from (10,10) to a 1D array of 100 values
)).T                     # Transpose it to shape (100, 2) → each row is one (x, y) point

# Predict the output (z values) using the trained Linear Regression model
z_final = lr.predict(final).reshape(10, 10)  
# Reshape predicted values back to (10,10) to match the grid for plotting

# Store the predicted z values into variable z (for easier usage later)
z = z_final

In [None]:
# Create a 3D scatter plot of the original dataset
fig = px.scatter_3d(
    df,                  # DataFrame containing the dataset
    x='feature1',         # X-axis uses feature1 values
    y='feature2',         # Y-axis uses feature2 values
    z='target'            # Z-axis uses actual target values
)

# Add a 3D surface plot on the same figure
# This surface represents the predicted regression plane (best-fit plane)
fig.add_trace(
    go.Surface(
        x=x,              # X-axis grid values (from linspace)
        y=y,              # Y-axis grid values (from linspace)
        z=z               # Predicted Z values on the grid (from model prediction)
    )
)

# Display the final interactive 3D plot (scatter points + regression surface)
fig.show()

In [None]:
# lr.coef_ gives the coefficients (weights) learned by the Linear Regression model
# It shows how much each feature affects the target value
# Example meaning:
# target = (coef1 * feature1) + (coef2 * feature2) + intercept
lr.coef_

array([51.87307115,  2.69204582])

In [None]:
# lr.intercept_ gives the intercept (bias) of the Linear Regression model
# It is the constant value added to the prediction when all feature values are 0
# Final equation becomes:
# target = (coef1 * feature1) + (coef2 * feature2) + intercept
lr.intercept_

np.float64(3.3828909151504627)