diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py index 39bee5712c16..faea0f1353ad 100644 --- a/machine_learning/linear_regression.py +++ b/machine_learning/linear_regression.py @@ -31,85 +31,32 @@ def collect_dataset(): return dataset -def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): - """Run steep gradient descent and updates the Feature vector accordingly_ - :param data_x : contains the dataset - :param data_y : contains the output associated with each data-entry - :param len_data : length of the data_ - :param alpha : Learning rate of the model - :param theta : Feature vector (weight's for our model) - ;param return : Updated Feature's, using - curr_features - alpha_ * gradient(w.r.t. feature) - """ - n = len_data - - prod = np.dot(theta, data_x.transpose()) - prod -= data_y.transpose() - sum_grad = np.dot(prod, data_x) - theta = theta - (alpha / n) * sum_grad - return theta - - -def sum_of_square_error(data_x, data_y, len_data, theta): - """Return sum of square error for error calculation - :param data_x : contains our dataset - :param data_y : contains the output (result vector) - :param len_data : len of the dataset - :param theta : contains the feature vector - :return : sum of square error computed from given feature's - """ - prod = np.dot(theta, data_x.transpose()) - prod -= data_y.transpose() - sum_elem = np.sum(np.square(prod)) - error = sum_elem / (2 * len_data) - return error - - -def run_linear_regression(data_x, data_y): - """Implement Linear regression over the dataset - :param data_x : contains our dataset - :param data_y : contains the output (result vector) +def ols_linear_regression(data_x: np.ndarray, data_y: np.ndarray) -> np.ndarray: + """Implement Linear regression using OLS over the dataset + :param data_x : contains our dataset + :param data_y : contains the output (result vector) :return : feature for line of best fit (Feature vector) """ - iterations = 100000 - alpha = 0.0001550 - - no_features = data_x.shape[1] - len_data = data_x.shape[0] - 1 + # Add a column of ones to data_x for the bias term + data_x = np.c_[np.ones(data_x.shape[0]), data_x].astype(float) - theta = np.zeros((1, no_features)) - - for i in range(iterations): - theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) - error = sum_of_square_error(data_x, data_y, len_data, theta) - print(f"At Iteration {i + 1} - Error is {error:.5f}") + # Use NumPy's built-in function to solve the linear regression problem + theta = np.linalg.inv(data_x.T @ data_x) @ data_x.T @ data_y return theta -def mean_absolute_error(predicted_y, original_y): - """Return sum of square error for error calculation - :param predicted_y : contains the output of prediction (result vector) - :param original_y : contains values of expected outcome - :return : mean absolute error computed from given feature's - """ - total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y)) - return total / len(original_y) - - def main(): """Driver function""" data = collect_dataset() - - len_data = data.shape[0] - data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) + data_x = data[:, :-1].astype(float) data_y = data[:, -1].astype(float) - theta = run_linear_regression(data_x, data_y) - len_result = theta.shape[1] - print("Resultant Feature vector : ") - for i in range(len_result): - print(f"{theta[0, i]:.5f}") + theta = run_linear_regression_ols(data_x, data_y) + print("Resultant Feature vector (weights): ") + theta_list = theta.tolist()[0] + for i in range(len(theta_list)): + print(f"{theta_list[i]:.5f}") if __name__ == "__main__":