In [1]:
with open("../data/housing.csv",'r') as f:
    lines=f.readlines()
column_names=lines[0].strip().split(',')
target='median_house_value'
features=['longitude','latitude','housing_median_age','total_rooms','total_bedrooms','population','households','median_income']

feature_indexes=[column_names.index(c) for c in features]
target_index=column_names.index(target)


In [2]:
X=[]
y=[]
for data in lines[1:]:
    values=data.strip().split(',')

    missing=False
    for i in feature_indexes+[target_index]:
        if(i>=len(values) or values[i].strip()==''):
            missing=True
            break
    if missing:
        continue

    feature_values=[]
    for i in feature_indexes:
        feature_values.append(float(values[i].strip()))
    X.append(feature_values)
                                           
    y.append(float(values[target_index].strip()))


In [3]:
print(X[0])
print(y[0])

[-122.23, 37.88, 41.0, 880.0, 129.0, 322.0, 126.0, 8.3252]
452600.0


In [4]:
m=len(X[0])
Ws=[0.0]*m
alpha=0.01

b=0.0  # for bias term 'b' in f(w1,w2,...,b)(x) = x1w1+x2w2+.....+b
n=len(X)
previous_cost=float('inf')
epsilon=0.001
max_itr=10000
itr=0


In [5]:
def dot(v1,v2):
    return sum(v1[i]*v2[i] for i in range(len(v1)))

def compute_cost(w,b):
    total_error=0.0
    for i in range(n):
        x_i=X[i]
        y_i=y[i]
        y_pred=sum(Ws[j]*x_i[j] for j in range(m))+b
        error=y_pred-y_i                                #J(w,b)=1/2n*sigma(1 to m)(f(xi)-yi)^2     formula for cost function
        total_error+=error*error
    return total_error/(2 * n)
    

In [6]:
print(n)
print(m)
print(Ws)

200
8
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
itr = 0
while itr<max_itr:
    dWs = [0.0] * m
    db = 0.0


    for i in range(n):
        x_i = X[i]
        y_i = y[i]
        y_pred = dot(Ws, x_i) + b
        error = y_pred - y_i

        for j in range(m):
            dWs[j] += error * x_i[j]
        db += error
        print(x_i)
        print(y_i)
        print(f"Iteartion {itr}: Db calculated: {db} for {i}th iteration where error is {error} for y_predicted as {y_pred} ")

    
    for j in range(m):
        dWs[j] /= n
    db /= n
    temp_Ws = [Ws[j] - alpha * dWs[j] for j in range(m)]
    temp_b = b - alpha * db

    current_cost = compute_cost(temp_Ws, temp_b)

    
    if abs(previous_cost-current_cost)<epsilon:
        break
    Ws = temp_Ws
    b = temp_b
    previous_cost=current_cost
    itr+=1


[-122.23, 37.88, 41.0, 880.0, 129.0, 322.0, 126.0, 8.3252]
452600.0
Iteartion 0: Db calculated: -452600.0 for 0th iteration where error is -452600.0 for y_predicted as 0.0 
[-122.22, 37.86, 21.0, 7099.0, 1106.0, 2401.0, 1138.0, 8.3014]
358500.0
Iteartion 0: Db calculated: -811100.0 for 1th iteration where error is -358500.0 for y_predicted as 0.0 
[-122.24, 37.85, 52.0, 1467.0, 190.0, 496.0, 177.0, 7.2574]
352100.0
Iteartion 0: Db calculated: -1163200.0 for 2th iteration where error is -352100.0 for y_predicted as 0.0 
[-122.25, 37.85, 52.0, 1274.0, 235.0, 558.0, 219.0, 5.6431]
341300.0
Iteartion 0: Db calculated: -1504500.0 for 3th iteration where error is -341300.0 for y_predicted as 0.0 
[-122.25, 37.85, 52.0, 1627.0, 280.0, 565.0, 259.0, 3.8462]
342200.0
Iteartion 0: Db calculated: -1846700.0 for 4th iteration where error is -342200.0 for y_predicted as 0.0 
[-122.25, 37.85, 52.0, 919.0, 213.0, 413.0, 193.0, 4.0368]
269700.0
Iteartion 0: Db calculated: -2116400.0 for 5th iteration 

In [10]:

# === Step 4: Output results ===
print(f"Converged in {itr} iterations")
print("Learned parameters:")
for i, value in enumerate(Ws):
    print(f"Ws_{i} ({features[i]}): {value}")
print(f"b (bias): {b}")
    


Converged in 100000 iterations
Learned parameters:
Ws_0 (longitude): nan
Ws_1 (latitude): nan
Ws_2 (housing_median_age): nan
Ws_3 (total_rooms): nan
Ws_4 (total_bedrooms): nan
Ws_5 (population): nan
Ws_6 (households): nan
Ws_7 (median_income): nan
b (bias): nan
