In [None]:
#importing the libraries
import numpy as np
import pandas as pd

In [None]:
#reading the data
df = pd.read_csv("FoDS-Assignment-2.csv")

# shuffle the DataFrame rows
df = df.sample(frac = 1)

df.isnull().sum()

In [None]:
#handling missing values-----dropping rows method

df = df.dropna(axis = 0)
df.isnull().sum()

In [None]:
Features_list = ["bedrooms", "bathrooms", "sqft_living", "sqft_lot", "floors", "waterfront", "view", "condition", "grade", "sqft_above", "sqft_basement", "sqft_living15", "sqft_lot15"]

#Feature Scaling
#normalising the data values
df = (df - df.min()) / (df.max() - df.min())

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
print(df.isnull().sum())

In [None]:
#splitting the dataset into training data and testing data
splitData = int(0.7*len(X))
train_X, test_X, train_y, test_y = X[:splitData], X[splitData:], y[:splitData], y[splitData:]

In [None]:
head = list(df.columns.values)
for x in head:
    q90, q10 = np.percentile(df.loc[:, x],[90, 10])
    IQR = q90-q10
 
    max = q90 + (2 * IQR)
    min = q10 - (2 * IQR)
 
    df.loc[df[x] < min, x] = np.nan
    df.loc[df[x] > max, x] = np.nan

print(df.isnull().sum())
df = df.dropna(axis = 0)
print(df.isnull().sum())

In [None]:
WeightV = np.zeros(14)
for n in range(14):
    WeightV[n] = np.random.randn()

In [None]:
#finding the weights based on the training data
def fit(X, Y, iters, learning_rate, F_selected, F_trial):
    for n in range(14):
        WeightV[n] = np.random.randn()
    for itr in range(iters):
        sumItrError = 0    
        for z in range(len(X)):        # each row in input data
            dataP_error = 0            # calculating error in each data point
            
            for m in range(13):
                dataP_error += (F_selected[m] + F_trial[m]) * WeightV[m+1] * X[z][m]    # summation of (w1*x1 + w2*x2 + w3*x3 + w4*x4 ...)
                
            dataP_error += WeightV[0]
            dataP_error -= Y[z]      # (w0 + w1*x1 + w2*x1^2 + w3*x1*x2 + w4*x2^2 ...) - yn

            # for each parameter(w0, w1, w2,...)    
            for m in range(14):                   
                if(m == 0):
                    WeightV[m] -= (learning_rate/len(X)) * dataP_error
                else:
                    WeightV[m] -= (learning_rate/len(X)) * dataP_error * X[z][m-1]   # calculating w0, w1, w2,... for each iteration
        
            dataP_error = (dataP_error**2)
            sumItrError += dataP_error/(2*len(Y))
            
        sumItrError = (sumItrError)**0.5
    return sumItrError    

In [None]:
def predict(X, Y, F_selected, F_trial):
    sumItrError = 0
    for z in range(len(X)):        # each row in input data
        dataP_error = 0            # calculating error in each data point
            
        for m in range(13):
            dataP_error += (F_selected[m] + F_trial[m]) * WeightV[m+1] * X[z][m]    # summation of (w1*x1 + w2*x2 + w3*x3 + w4*x4 ...)
        dataP_error += WeightV[0]
        
        dataP_error -= Y[z]      # (w0 + w1*x1 + w2*x1^2 + w3*x1*x2 + w4*x2^2 ...) - yn
        dataP_error = (dataP_error**2)
        sumItrError += dataP_error/(2*len(X))
            
    sumItrError = (sumItrError)**0.5
    return sumItrError

In [None]:
F_selected = np.zeros(13)
F_trial = np.zeros(13)
finalFeatures = np.zeros(13)
finalMinE = float('inf')

for i in range(13):
    minErrorIn_i = float('inf')
    for j in range(13):
        if(F_selected[j]==1):
            continue
        F_trial[j] = 1
        error_j = fit(train_X, train_y, 10, 0.01, F_selected, F_trial)
        print(error_j)
        if(error_j < minErrorIn_i):
            minErrorIn_i = error_j
            minIndex = j
        F_trial[j] = 0
    F_selected[minIndex] = 1 
    if(minErrorIn_i < finalMinE):
        finalMinE = minErrorIn_i
        finalIndex = i
        for u in range(13):
            finalFeatures[u] = F_selected[u]
    print("Minimum error for", i+1, "feature(s) is", minErrorIn_i)

print("-----------------------------------------------------------------------------------------------------------------------")    
print("Minimum training error is", finalMinE) 
print("Number of features needed for giving this minimum training error are", finalIndex+1)   
print("\nList of features giving minimum training error -")
for i in range(13):
    if(finalFeatures[i]==1):
        print(Features_list[i])

In [None]:
fit(train_X, train_y, 500, 1, finalFeatures, F_trial)

#making predictions on test data
testingE = predict(test_X, test_y, finalFeatures, F_trial)
print("Minimum testing error considering these", finalIndex+1, "features is", testingE)
 