In [45]:
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

class Linear_regressor:
    def initialize(self):
        theta = np.random.randn(9) * 0.01
        b = np.zeros((9, 1))

        parameters = {
            "theta": theta,
            "b": b
        }
        return parameters

    def forward_prop(self, X_train, parameters):
        theta = parameters["theta"]
        b = parameters["b"]

        A0 = X_train
        Y_hat = theta @ A0 + b
        return Y_hat

    def back_prop(self, Y_hat, parameters, X, Y):
        theta = parameters["theta"]
        b = parameters["b"]

        m = X.shape[1]
        diff_Y = Y_hat - Y

        d_theta = (1 / m) * (diff_Y @ X.T)
        d_b = (1 / m) * (np.sum(diff_Y, axis=1, keepdims=True))

        grads = {
            "d_theta": d_theta,
            "d_b": d_b
        }
        return grads

    def calculate_cost(self, Y_hat, Y):
        m = Y.shape[1]
        cost = ((1) / m) * np.sum((Y_hat - Y) ** 2)

        return cost

    def update_parameters(self, grads, learning_rate, parameters):
        d_theta = grads["d_theta"]
        d_b = grads["d_b"]

        theta = parameters["theta"]
        b = parameters["b"]

        theta = theta - (learning_rate * d_theta)
        b = b - (learning_rate * d_b)

        parameters = {
            "theta": theta,
            "b": b
        }
        return parameters

    def model(self, X_train, Y_train, num_iterations=100, learning_rate=0.01, print_cost=False):
        # layer sizes
        n_x = X_train.shape[0]
        n_y = Y_train.shape[0]

        # initialize parameters
        parameters = self.initialize()
        costs = []
        for i in range(0, num_iterations):
            # forward prop
            Y_hat = self.forward_prop(X_train, parameters)

            # cost function
            cost = self.calculate_cost(Y_hat, Y_train)
            costs.append(cost)

            # back prop
            grads = self.back_prop(Y_hat, parameters, X_train, Y_train)

            # update param
            parameters = self.update_parameters(grads, learning_rate, parameters)

            if print_cost and i % 50 == 0:
                print("Cost after iteration {0}: {1}".format(i, cost))

        return parameters, costs

    def predict(self, X, parameters):
        Y_hat = self.forward_prop(X, parameters)
        return Y_hat

    def do_lin_reg(self, np_X_train, np_X_test, np_Y_train, np_Y_test):
        print("Y_true shape: {0}".format(np_Y_test.shape))
        learning_rate = 0.001
        parameters, costs = self.model(np_X_train.T, np_Y_train.T,
                                       num_iterations=500,
                                       learning_rate=learning_rate,
                                       print_cost=True)

        print(parameters["theta"].shape)
        y_preds_test = self.predict(np_X_test.T, parameters).T
        print("Y_true shape: {0}".format(np_Y_test.shape))
        print("Y_pred shape: {0}".format(y_preds_test.shape))
        
        print(np_Y_test[0])
        print(y_preds_test[0])

#         print(np_Y_test[:,:])
#         print(y_preds_test[:,])
        
        print('SKlearn Mean squared error: %.2f'
          % mean_squared_error(np_Y_test, y_preds_test))

        
        regr = linear_model.LinearRegression()
        regr.fit(np_X_train, np_Y_train)

        # Make predictions using the testing set
        y_pred = regr.predict(np_X_test)
        print('SKlearn Mean squared error: %.2f'
              % mean_squared_error(np_Y_test, y_pred))
        
        print(np_Y_test[0])
        print(y_pred[0])


In [46]:
from sklearn.model_selection import StratifiedKFold


from dataloader import DataLoader


class ML_Algo:
    

    def execute_regressor(self, final_dataset_path, split_size):
        dL = DataLoader()
        np_X_train, np_X_test, np_Y_train, np_Y_test = \
            dL.preprocess_data_from_csv_multi(final_dataset_path, split_size)
        lin_reg = Linear_regressor()

        print("1. Linear regressor")
        lin_reg.do_lin_reg(np_X_train, np_X_test, np_Y_train, np_Y_test)




In [47]:

print("####" * 20)
print("--> Multi class classification move dataset: <--")
final_dataset_path = "datasets-part1/tictac_multi.txt"
split_size = 0.8
algo = ML_Algo()
algo.execute_regressor(final_dataset_path, split_size)

################################################################################
--> Multi class classification move dataset: <--
.. Data Loading ..
ps_np_covariates_X: (6551, 9)
ps_np_treatment_Y: (6551, 9)
np_covariates_X_train: (5240, 9)
np_covariates_Y_train: (5240, 9)
np_covariates_X_test: (1311, 9)
np_covariates_Y_test: (1311, 9)
1. Linear regressor
Y_true shape: (1311, 9)
Cost after iteration 0: 1.9483038892953295
Cost after iteration 50: 1.9053601431223404
Cost after iteration 100: 1.866659447241596
Cost after iteration 150: 1.8317808608049873
Cost after iteration 200: 1.8003453095661208
Cost after iteration 250: 1.772011415115115
Cost after iteration 300: 1.7464717400358207
Cost after iteration 350: 1.7234494074806772
Cost after iteration 400: 1.7026950578019695
Cost after iteration 450: 1.6839841086080087
(9, 9)
Y_true shape: (1311, 9)
Y_pred shape: (1311, 9)
[0. 1. 0. 0. 0. 0. 1. 1. 0.]
[0.12668079 0.09965621 0.11617653 0.085061   0.14057202 0.07812002
 0.11172899 0.08604145