In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [None]:
#only for representation purpose, viewing the data as pandas dataframe
data=fetch_california_housing(as_frame=True) #function from scikit-learn to load california housing dataset (as_frame=True --> to view in pandas Dataframe)
X=data.data                                  #extracting features(input variables) from dataset
y=data.target                                #extracting target(output variables)
X

In [None]:
housing=fetch_california_housing(as_frame=False)  # notice how we are not using pandas here
X=housing.data
y=housing.target


In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing(as_frame=False) # loading dataset(numpy array)
X = housing.data
y = housing.target

medinc_index = housing.feature_names.index("MedInc") #Finding column index of MedInc

# Plotting Median Income vs House Value
plt.figure(figsize=(6,6))
# Use the integer index for the MedInc column
plt.scatter(X[:, medinc_index], y, alpha=0.3)
plt.xlabel("Median Income (in $10,000s)")
plt.ylabel("Median House Value ($100,000s)")
plt.title("Median Income vs House Value")
plt.yticks([0, 2, 4, 6])
plt.show()


In [None]:
#normalising the input data, X
def minmax(X):
    X_scaled = np.zeros_like(X) #Creating a zero array(same shape as X) to store normalized values
    for i in range(X.shape[1]):# looping over each column
        X_scaled[:, i]=(X[:, i]-np.min(X[:, i]))/(np.max(X[:, i])-np.min(X[:, i])) #Formula:(X-Xmin)/(Xmax-Xmin)
    return X_scaled

In [None]:
X_scaled=minmax(X)
X_scaled

In [None]:
X_train, X_test, y_train, y_test=train_test_split(X_scaled, y, test_size=0.2, random_state=10)
X_train.shape, y_train.shape

In [None]:
def initialise():
    np.random.seed(10)
    w=np.random.rand(X.shape[1]) #array of weights
    b=np.random.rand()

    return w, b

In [None]:
def mse(y, y_pred):
    return np.mean((y-y_pred)**2)

In [None]:
def prediction(x,w,b):
    return np.dot(x,w)+b

In [None]:
def train_model(X, y, epochs, alpha, n, w, b):
    """
    X → input features (matrix)
    y → actual target values
    epochs → how many times to update weights
    alpha → learning rate (step size)
    n → number of samples (rows of X)
    w, b → initial weights and bias (from initialise())
    """

    # Gradient descent
    for i in range(epochs):

        y_pred = prediction(X, w, b)

        dw = (-2/n) * np.dot(X.T, (y - y_pred))
        db = (-2/n) * np.sum((y - y_pred))

        # updating weights
        w = w - alpha * dw
        b = b - alpha * db

        if i % 100 == 0:
            print("loss =", mse(y, y_pred))  # printing loss after every 100 epochs

    return w, b

In [None]:
epochs=30000
alpha=0.1 #step count
n=X_train.shape[0]
w,b=initialise()
w, b=train_model(X_train, y_train, epochs, alpha, n,w, b)
w, b

In [None]:
print(np.column_stack((prediction(X_test, w, b), y_test)))
#Will give output of the form :[Predicted value,Actual value]

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np

# train the model
model = LinearRegression()
model.fit(X_train, y_train)

# predict on test set
y_pred_sklearn = model.predict(X_test)

# view both arrays side by side
print(np.column_stack((y_pred_sklearn[:10], y_test[:10])))


In [None]:
y_pred = prediction(X_test, w, b).flatten()
y_test_flat = y_test.flatten()

plt.figure(figsize=(8, 8))
plt.scatter(y_test_flat, y_pred, alpha=0.6, s=50)

# Plotting perfect prediction line (45 degree)
min_val = min(y_test_flat.min(), y_pred.min())
max_val = max(y_test_flat.max(), y_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect Prediction')

plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted (All Features)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axis('equal')
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("Features scaled.")

In [None]:
# Train using scaled features
w, b = train_model(X_scaled, y, epochs, alpha, n, w, b)
print("Training completed with scaled data.")

In [None]:
# Prediction cell with scaling + sample valid input suggestions

print("\nEnter values to predict housing price:")
print("\n--- Sample Valid Inputs (Based on California Housing Dataset) ---")
print("Median Income (MedInc): 1.5 to 15")
print("House Age: 1 to 52")
print("Average Rooms (AveRooms): 2 to 10")
print("Average Bedrooms (AveBedrms): 0.5 to 5")
print("Population: 100 to 35000")
print("Average Occupancy (AveOccup): 1 to 6")
print("Latitude: 32 to 42")
print("Longitude: -124 to -114")
print("---------------------------------------------------------------\n")

MedInc      = float(input("Median Income in 10,000$ (suggested 3–10): "))
HouseAge    = float(input("Median House Age in years (suggested 10–40): "))
AveRooms    = float(input("Average Rooms (suggested 3–7): "))
AveBedrms   = float(input("Average Bedrooms (suggested 0.5–2): "))
Population  = float(input("Population (suggested 500–10000): "))
AveOccup    = float(input("Average Occupancy (suggested 1–5): "))
Latitude    = float(input("Latitude (suggested 33–38): "))
Longitude   = float(input("Longitude (suggested -123 to -115): "))

# Convert to array
user_input = np.array([[MedInc, HouseAge, AveRooms, AveBedrms,
                        Population, AveOccup, Latitude, Longitude]])

# Scale input just like training data
user_scaled = scaler.transform(user_input)

# Predict
pred = prediction(user_scaled, w, b)

print("\nPredicted House Price(in $):", pred[0]*100000)


Enter values to predict housing price:

--- Sample Valid Inputs (Based on California Housing Dataset) ---
Median Income (MedInc): 1.5 to 15
House Age: 1 to 52
Average Rooms (AveRooms): 2 to 10
Average Bedrooms (AveBedrms): 0.5 to 5
Population: 100 to 35000
Average Occupancy (AveOccup): 1 to 6
Latitude: 32 to 42
Longitude: -124 to -114
---------------------------------------------------------------

