<a href="https://colab.research.google.com/github/PrathyushaMyla2005/google-colab/blob/main/Linear_multiple_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 import pandas as pd
import numpy as np
from sklearn import linear_model

In [None]:
import pandas as pd                   # pandas is used to create and handle table-like data (DataFrame)
import numpy as np                    # numpy is used for numerical operations (NaN, arrays, math)
import math                           # math is used here for floor() function
from sklearn import linear_model      # sklearn provides machine learning models like Linear Regression

# ✅ Step 1: Create dataset (training data)
data = {
    "area": [1500, 1800, 2400, 3000, 3500],                 # input feature 1 (x1) = area in sqft
    "bedrooms": [3, np.nan, 4, np.nan, 5],                  # input feature 2 (x2) = bedrooms (NaN = missing)
    "age": [10, 8, 5, 2, 1],                                # input feature 3 (x3) = age of house
    "price": [4000000, 5000000, 6500000, 8000000, 9500000]  # output (y) = house price
}

# ✅ Step 2: Convert dictionary to DataFrame (table)
df = pd.DataFrame(data)              # now df is a table with columns area, bedrooms, age, price

# ✅ Step 3: Find median of bedrooms column (ignores NaN automatically)
median_bedrooms = math.floor(df.bedrooms.median())
# df.bedrooms.median() gives median value of bedrooms (example: 4.0)
# math.floor() converts it to int like 4

# ✅ Step 4: Fill the missing bedrooms values (NaN) with the median
df.bedrooms = df.bedrooms.fillna(median_bedrooms)
# fillna(median_bedrooms) replaces NaN values with median (4)

# ✅ Step 5: Create Linear Regression model object
reg = linear_model.LinearRegression()
# reg is now an empty Linear Regression model (not trained yet)

# ✅ Step 6: Train (fit) the model
reg.fit(df[['area', 'bedrooms', 'age']], df.price)
# df[['area','bedrooms','age']] is X (features / inputs)
# df.price is y (target / output)
# fit() finds best weights (w values) and intercept

# ✅ Step 7: Store features (X) and target (y) separately
X = df[['area', 'bedrooms', 'age']]   # X contains input columns (area, bedrooms, age)
y = df['price']                       # y contains output column (price)

# ✅ Step 8: Predict prices for the same training data using trained model
y_pred = reg.predict(X)
# y_pred contains model predicted prices for each row in X

# ✅ Step 9: Compute cost function J
m = len(y)                            # m = number of training examples (here m = 5 rows)
J = (1/(2*m)) * np.sum((y_pred - y)**2)
# (y_pred - y) gives error for each example
# **2 squares each error
# np.sum(...) adds all squared errors
# (1/(2*m)) divides by 2m as per cost function formula

# ✅ Step 10: Print cost value
print("Cost Function J =", J)
# prints the final cost (lower cost means better predictions)


Cost Function J = 4.598751934814516e-16


In [None]:
import math
median_bedrooms = math.floor(df.bedrooms.median())
median_bedrooms

4

In [None]:
df.bedrooms = df.bedrooms.fillna(median_bedrooms) #fil na means
df


Unnamed: 0,area,bedrooms,age,price
0,1500,3.0,10,4000000
1,1800,4.0,8,5000000
2,2400,4.0,5,6500000
3,3000,4.0,2,8000000
4,3500,5.0,1,9500000


In [None]:
reg = linear_model.LinearRegression()
reg.fit(df[['area', 'bedrooms', 'age']], df.price)

In [None]:
reg.coef_

array([ 2.50000000e+03,  2.50000000e+05, -2.03726813e-10])

In [None]:
reg.intercept_

np.float64(-499999.9999998985)

In [None]:
reg.predict([[3000, 3, 40]])



array([7749999.99999997])

In [None]:
 import numpy as np

# ✅ Training Data (Single Variable)
x = np.array([1, 2, 3, 4], dtype=float)
y = np.array([2, 4, 6, 8], dtype=float)

n = len(x)

# ✅ Initialize coefficient (w) and intercept (b)
w = 0.0   # coefficient (slope)
b = 0.0   # intercept (bias)

alpha = 0.1
epochs = 10

print("----- Training using Gradient Descent -----\n")

for i in range(epochs):

    # ✅ Prediction
    y_pred = w * x + b

    # ✅ Cost Function (MSE)
    cost = (1/n) * np.sum((y - y_pred) ** 2)

    # ✅ Gradients
    dw = (-2/n) * np.sum(x * (y - y_pred))
    db = (-2/n) * np.sum(y - y_pred)

    # ✅ Update coefficient and intercept
    w = w - alpha * dw
    b = b - alpha * db

    print(f"Epoch {i+1}: Cost={cost:.4f} | Coefficient(w)={w:.4f} | Intercept(b)={b:.4f}")

print("\n✅ Final Results:")
print("✅ Coefficient (w) =", w)
print("✅ Intercept (b)   =", b)

# ✅ Test Prediction
test_x = 5
predicted_y = w * test_x + b
print(f"\n✅ For x = {test_x}, predicted y = {predicted_y:.2f}")


----- Training using Gradient Descent -----

Epoch 1: Cost=30.0000 | Coefficient(w)=3.0000 | Intercept(b)=1.0000
Epoch 2: Cost=13.5000 | Coefficient(w)=1.0000 | Intercept(b)=0.3000
Epoch 3: Cost=6.0900 | Coefficient(w)=2.3500 | Intercept(b)=0.7400
Epoch 4: Cost=2.7614 | Coefficient(w)=1.4550 | Intercept(b)=0.4170
Epoch 5: Cost=1.2653 | Coefficient(w)=2.0640 | Intercept(b)=0.6061
Epoch 6: Cost=0.5920 | Coefficient(w)=1.6649 | Intercept(b)=0.4529
Epoch 7: Cost=0.2884 | Coefficient(w)=1.9411 | Intercept(b)=0.5298
Epoch 8: Cost=0.1507 | Coefficient(w)=1.7645 | Intercept(b)=0.4533
Epoch 9: Cost=0.0876 | Coefficient(w)=1.8911 | Intercept(b)=0.4804
Epoch 10: Cost=0.0581 | Coefficient(w)=1.8143 | Intercept(b)=0.4388

✅ Final Results:
✅ Coefficient (w) = 1.814273395
✅ Intercept (b)   = 0.4387739729999998

✅ For x = 5, predicted y = 9.51
