In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing

# 1. Load the Dataset
housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['Price'] = housing.target

# --- PART A: SIMPLE LINEAR REGRESSION ---
# Using only 'MedInc' (Median Income) to predict 'Price'
X_simple = df[['MedInc']] 
y = df['Price']

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(X_simple, y, test_size=0.2, random_state=42)

simple_model = LinearRegression()
simple_model.fit(X_train_s, y_train_s)
y_pred_s = simple_model.predict(X_test_s)

# --- PART B: MULTI-LINEAR REGRESSION ---
# Using all features (Income, House Age, AveRooms, etc.)
X_multi = df.drop('Price', axis=1)

X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X_multi, y, test_size=0.2, random_state=42)

multi_model = LinearRegression()
multi_model.fit(X_train_m, y_train_m)
y_pred_m = multi_model.predict(X_test_m)

# --- RESULTS ---
print("--- Simple Linear Regression Results ---")
print(f"R2 Score: {r2_score(y_test_s, y_pred_s):.4f}")
print(f"Intercept: {simple_model.intercept_:.4f}")
print(f"Coefficient: {simple_model.coef_[0]:.4f}")

print("\n--- Multi-Linear Regression Results ---")
print(f"R2 Score: {r2_score(y_test_m, y_pred_m):.4f}")
print(f"Intercept: {multi_model.intercept_:.4f}")
print("Coefficients:", multi_model.coef_)


--- Simple Linear Regression Results ---
R2 Score: 0.4589
Intercept: 0.4446
Coefficient: 0.4193

--- Multi-Linear Regression Results ---
R2 Score: 0.5758
Intercept: -37.0233
Coefficients: [ 4.48674910e-01  9.72425752e-03 -1.23323343e-01  7.83144907e-01
 -2.02962058e-06 -3.52631849e-03 -4.19792487e-01 -4.33708065e-01]


In [5]:
# Linear and Multi-Linear Regression Implementation in Python
# Using scikit-learn and the Diabetes dataset 

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# -----------------------------
# 1. LOAD DATASET (DIABETES)
# -----------------------------
from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df['Target'] = diabetes.target

print("Dataset Preview:")
print(df.head())

# -----------------------------
# 2. SIMPLE LINEAR REGRESSION
# -----------------------------
print("\n--- SIMPLE LINEAR REGRESSION ---")

# Using only ONE independent variable
X_simple = df[['bmi']]   # Body Mass Index feature
y = df['Target']

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(
    X_simple, y, test_size=0.2, random_state=42
)

simple_model = LinearRegression()
simple_model.fit(X_train_s, y_train_s)

# Predictions
simple_preds = simple_model.predict(X_test_s)

# Evaluation
print("Coefficient:", simple_model.coef_[0])
print("Intercept:", simple_model.intercept_)
print("RMSE:", np.sqrt(mean_squared_error(y_test_s, simple_preds)))
print("R2 Score:", r2_score(y_test_s, simple_preds))

# -----------------------------
# 3. MULTI-LINEAR REGRESSION
# -----------------------------
print("\n--- MULTI-LINEAR REGRESSION ---")

# Using MULTIPLE independent variables
X_multi = df.drop('Target', axis=1)

X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(
    X_multi, y, test_size=0.2, random_state=42
)

multi_model = LinearRegression()
multi_model.fit(X_train_m, y_train_m)

# Predictions
multi_preds = multi_model.predict(X_test_m)

# Evaluation
print("Coefficients:")
for feature, coef in zip(X_multi.columns, multi_model.coef_):
    print(f"{feature}: {coef}")

print("Intercept:", multi_model.intercept_)
print("RMSE:", np.sqrt(mean_squared_error(y_test_m, multi_preds)))
print("R2 Score:", r2_score(y_test_m, multi_preds))


Dataset Preview:
        age       sex       bmi        bp        s1        s2        s3  \
0  0.038076  0.050680  0.061696  0.021872 -0.044223 -0.034821 -0.043401   
1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163  0.074412   
2  0.085299  0.050680  0.044451 -0.005670 -0.045599 -0.034194 -0.032356   
3 -0.089063 -0.044642 -0.011595 -0.036656  0.012191  0.024991 -0.036038   
4  0.005383 -0.044642 -0.036385  0.021872  0.003935  0.015596  0.008142   

         s4        s5        s6  Target  
0 -0.002592  0.019907 -0.017646   151.0  
1 -0.039493 -0.068332 -0.092204    75.0  
2 -0.002592  0.002861 -0.025930   141.0  
3  0.034309  0.022688 -0.009362   206.0  
4 -0.002592 -0.031988 -0.046641   135.0  

--- SIMPLE LINEAR REGRESSION ---
Coefficient: 998.5776891375593
Intercept: 152.00335421448167
RMSE: 63.73245584860925
R2 Score: 0.23335039815872138

--- MULTI-LINEAR REGRESSION ---
Coefficients:
age: 37.90402135007501
sex: -241.96436231273995
bmi: 542.4287585162895
bp: 347.70384