In [1]:
import numpy as np
train_data = np.loadtxt('training_dataset.csv', delimiter=',')
x_train = train_data[:,:-1]
y_train = train_data[:,-1]
print(x_train.shape)

(7176, 4)


In [3]:
test_data = np.loadtxt('testing_dataset.csv', delimiter = ',')

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(test_data)

In [5]:
def initialize_parameters(n):
    W = np.zeros(n)
    b = 0
    return W, b

In [6]:
def compute_cost(X,Y,W,b):
    m = X.shape[0]
    cost = np.sum((np.dot(X,W)+b-Y)**2)/(2*m)
    return cost

In [7]:
def gradient_descent(X,Y,W,b, learning_rate, num_iterations):
    m = X.shape[0]
    cost_history = []
    for i in range(num_iterations):
        Y_pred = np.dot(X,W) + b
        dW = (1/m) * np.dot(X.T, (Y_pred - Y))
        db = (1/m) * np.sum(Y_pred - Y)
        W -= learning_rate * dW
        b -= learning_rate * db
        cost = compute_cost(X,Y,W,b)
        cost_history.append(cost)
        if i % 100 == 0:
            print(f'Iteration{i}: cost{cost}')
    return W, b, cost_history

In [8]:
n_features = x_train_scaled.shape[1]
W,b = initialize_parameters(n_features)
learning_rate = 0.001
num_iterations = 10000
W,b, cost_history = gradient_descent(x_train_scaled, y_train, W, b, learning_rate, num_iterations)

Iteration0: cost103193.66187224616
Iteration100: cost84457.59412341965
Iteration200: cost69128.98325071638
Iteration300: cost56586.12382904828
Iteration400: cost46321.514743928055
Iteration500: cost37920.58370161317
Iteration600: cost31044.488610357723
Iteration700: cost25416.171876474902
Iteration800: cost20809.025842667288
Iteration900: cost17037.664057830472
Iteration1000: cost13950.397009291893
Iteration1100: cost11423.091276957544
Iteration1200: cost9354.153910559173
Iteration1300: cost7660.433487540502
Iteration1400: cost6273.868863896858
Iteration1500: cost5138.748339875149
Iteration1600: cost4209.467509713457
Iteration1700: cost3448.694726507446
Iteration1800: cost2825.869873499747
Iteration1900: cost2315.9757590441272
Iteration2000: cost1898.5325492944116
Iteration2100: cost1556.7747014562565
Iteration2200: cost1276.9772464111395
Iteration2300: cost1047.904302671107
Iteration2400: cost860.3576344646722
Iteration2500: cost706.8070983619783
Iteration2600: cost581.088120243647
It

In [9]:
def predict(X,W,b):
    return np.dot(X,W) + b
y_test_pred = predict(x_test_scaled, W,b)
print(y_test_pred)

[470.55216106 471.98400007 432.87249824 ... 439.76422969 453.58462396
 448.37045698]


In [10]:
np.savetxt('predictionstest.csv',y_test_pred, fmt = '%.5f')