TASK-2:Multiple Linear Regression 

● Use the same dataset or load another dataset with at least 4 numeric 
features. 
● Predict the target variable using multiple independent variables. 
● Evaluate the model using: 
○ R-squared 
○ Mean Squared Error (MSE) 
○ Root Mean Squared Error (RMSE) 
● Display the coefficients of each feature. 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error


In [3]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

housing = fetch_california_housing()
df_housing = pd.DataFrame(housing.data, columns=housing.feature_names)
df_housing['Target'] = housing.target

print(df_housing.head(10))


   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   
5  4.0368      52.0  4.761658   1.103627       413.0  2.139896     37.85   
6  3.6591      52.0  4.931907   0.951362      1094.0  2.128405     37.84   
7  3.1200      52.0  4.797527   1.061824      1157.0  1.788253     37.84   
8  2.0804      42.0  4.294118   1.117647      1206.0  2.026891     37.84   
9  3.6912      52.0  4.970588   0.990196      1551.0  2.172269     37.84   

   Longitude  Target  
0    -122.23   4.526  
1    -122.22   3.585  
2    -122.24   3.521  
3    -122.25   3.413  
4    -122.25   3.422  
5    -122.25   2.697  
6 

In [4]:
df_housing.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
 8   Target      20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [5]:
df_housing.isnull().sum()


MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
Target        0
dtype: int64

In [6]:
input_features = housing.feature_names
output_label = housing.target_names if hasattr(housing, 'target_names') else ['Target']


In [8]:
X_data = df_housing[input_features]
Y_data = df_housing['Target']

X_data.head()
Y_data.head()


0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
Name: Target, dtype: float64

In [9]:
X_array = X_data.values
Y_array = Y_data.values.ravel()


In [10]:
print(X_array.shape, Y_array.shape)



(20640, 8) (20640,)


In [11]:
train_x, test_x, train_y, test_y = train_test_split(X_array, Y_array, test_size=0.8, random_state=42)


In [12]:
mlr = LinearRegression()
mlr.fit(train_x, train_y)

predicted_y = mlr.predict(test_x)

print(mlr.intercept_)
mlr.coef_


-37.764304845475024


array([ 4.25546113e-01,  9.15501577e-03, -9.43236037e-02,  6.74748060e-01,
        4.88938032e-06, -1.50778303e-03, -4.27714004e-01, -4.42739701e-01])

In [13]:
r_squared = r2_score(test_y, predicted_y)
mse_value = mean_squared_error(test_y, predicted_y)
rmse_value = np.sqrt(mse_value)

print(f"R² Score: {r_squared:.4f}")
print(f"MSE: {mse_value:.4f}")
print(f"RMSE: {rmse_value:.4f}")


R² Score: 0.6070
MSE: 0.5218
RMSE: 0.7224


In [14]:
coef_table = pd.DataFrame({
    'Feature': housing.feature_names,
    'Weight': mlr.coef_
})

print("\nFeature Coefficients:")
print(coef_table)



Feature Coefficients:
      Feature    Weight
0      MedInc  0.425546
1    HouseAge  0.009155
2    AveRooms -0.094324
3   AveBedrms  0.674748
4  Population  0.000005
5    AveOccup -0.001508
6    Latitude -0.427714
7   Longitude -0.442740
