In [2]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, r2_score


class LinearRegressionUsingGD:
    """Linear Regression Using Gradient Descent.
    Parameters
    ----------
    eta : float
        Learning rate
    n_iterations : int
        No of passes over the training set
    Attributes
    ----------
    w_ : weights/ after fitting the model
    cost_ : total error of the model after each iteration
    """

    def __init__(self, eta=0.05, n_iterations=1000):
        self.eta = eta
        self.n_iterations = n_iterations

    def fit(self, x, y):
        """Fit the training data
        Parameters
        ----------
        x : array-like, shape = [n_samples, n_features]
            Training samples
        y : array-like, shape = [n_samples, n_target_values]
            Target values
        Returns
        -------
        self : object
        """

        self.cost_ = []
        self.w_ = np.zeros((x.shape[1], 1))
        m = x.shape[0]

        for _ in range(self.n_iterations):
            y_pred = np.dot(x, self.w_)
            residuals = y_pred - y
            gradient_vector = np.dot(x.T, residuals)
            self.w_ -= (self.eta / m) * gradient_vector
            cost = np.sum((residuals ** 2)) / (2 * m)
            self.cost_.append(cost)
        #print(self.cost_)
        return self

    def predict(self, x):
        """ Predicts the value after the model has been trained.
        Parameters
        ----------
        x : array-like, shape = [n_samples, n_features]
            Test samples
        Returns
        -------
        Predicted value
        """
        return np.dot(x, self.w_)
    
    def accuracy(self, x, actual_classes, probab_threshold=0):
       
        #predicted_classes = model.predict(x)
        #accuracy = accuracy_score(actual_classes.flatten(),predicted_classes)
        predicted_classes = (self.predict(x) >= probab_threshold).astype(int)
        predicted_classes = predicted_classes.flatten()
        #print(predicted_classes)
        accuracy = np.mean(predicted_classes == actual_classes)
        return accuracy * 100

In [3]:
import pandas as pd
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings('ignore')

import string

In [258]:
# def load_data(path, header):
#     marks_df = pd.read_csv(path, header=header)
#     return marks_df


In [4]:
def read_file(file_path):
        """Read the file and return in a format of numpy array"""
        corpus = pd.read_csv(file_path)
        corpus = np.array(corpus)
        return corpus

In [5]:
file_path1 = "Gamma_train.csv"
file_path2 = "Gamma_test.csv"
Train_data  = read_file(file_path1)
Test_data  = read_file(file_path2)

Train_data1 = Train_data[np.random.randint(Train_data.shape[0], size=20), :]
Train_data2 = Train_data[np.random.randint(Train_data.shape[0], size=100), :]
Train_data3 = Train_data[np.random.randint(Train_data.shape[0], size=200), :]
Train_data4 = Train_data[np.random.randint(Train_data.shape[0], size=1000), :]


In [6]:
X_train1 = Train_data1[:, 0:(Train_data1.shape[1]-1)]
y_train1 = Train_data1[:,(Train_data1.shape[1]-1)]

X_train2 = Train_data2[:, 0:(Train_data1.shape[1]-1)]
y_train2 = Train_data2[:,(Train_data1.shape[1]-1)]

X_train3 = Train_data3[:, 0:(Train_data1.shape[1]-1)]
y_train3 = Train_data3[:,(Train_data1.shape[1]-1)]

X_train4 = Train_data4[:, 0:(Train_data1.shape[1]-1)]
y_train4 = Train_data4[:,(Train_data1.shape[1]-1)]

X_test = Test_data[:, 0:(Train_data1.shape[1]-1)]
y_test = Test_data[:,(Train_data1.shape[1]-1)]

Xt = np.c_[np.ones((X_test.shape[0], 1)), X_test]
yt = y_test[:, np.newaxis]

In [233]:
# Train_data = load_data("Gamma_train.csv", None)
# X = Train_data.iloc[:, :-1]

# # y = target values, last column of the data frame
# y = Train_data.iloc[:, -1]

# # filter out the applicants that got admitted
# C1 = Train_data.loc[y == 1]
# C2 = Train_data.loc[y == -1]

# Train_data1 = Train_data[np.random.randint(Train_data.shape[0], size=10), :]
# Train_data2 = Train_data[np.random.randint(Train_data.shape[0], size=50), :]
# Train_data3 = Train_data[np.random.randint(Train_data.shape[0], size=100), :]
# Train_data4 = Train_data[np.random.randint(Train_data.shape[0], size=500), :]

In [9]:
X = np.c_[np.ones((X_train1.shape[0], 1)), X_train1]
y = y_train1[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

model = LinearRegressionUsingGD()

model.fit(X, y)
accuracy = model.accuracy(X, y.flatten())
parameters = model.w_
y_pred = model.predict(Xt)

for i in range (0,y_pred.shape[0]):
    if y_pred[i] > 0:
        y_pred[i] = 1
    else:
        y_pred[i] = -1
    i = i+1

accuracy1 = np.mean(y_pred.flatten() == yt.flatten())
print(accuracy1)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Model initialization
regression_model = LinearRegression()
# Fit the data(train the model)
regression_model.fit(X, y)
# Predict
y_predicted = regression_model.predict(Xt)

for i in range(0, y_predicted.shape[0]):
    if y_predicted[i] > 0:
        y_predicted[i] = 1
    else:
        y_predicted[i] = -1
    i = i+1;

accuracy2 = accuracy_score(yt.flatten(),y_predicted)
print(accuracy2)

rmse = mean_squared_error(y, regression_model.predict(X))
r2 = r2_score(y, regression_model.predict(X))

# printing values
print('Slope:' ,regression_model.coef_)
print('Intercept:', regression_model.intercept_)
print('Root mean squared error: ', rmse)
print('R2 score: ', r2)


0.861
0.861
Slope: [[ 0.         -0.17092071 -0.1714698 ]]
Intercept: [0.87624808]
Root mean squared error:  0.29477879979523164
R2 score:  0.7052212002047684


In [10]:
X = np.c_[np.ones((X_train2.shape[0], 1)), X_train2]
y = y_train2[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

model = LinearRegressionUsingGD()

model.fit(X, y)
accuracy = model.accuracy(X, y.flatten())
parameters = model.w_
y_pred = model.predict(Xt)

for i in range (0,y_pred.shape[0]):
    if y_pred[i] > 0:
        y_pred[i] = 1
    else:
        y_pred[i] = -1
    i = i+1

accuracy1 = np.mean(y_pred.flatten() == yt.flatten())
print(accuracy1)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Model initialization
regression_model = LinearRegression()
# Fit the data(train the model)
regression_model.fit(X, y)
# Predict
y_predicted = regression_model.predict(Xt)

for i in range(0, y_predicted.shape[0]):
    if y_predicted[i] > 0:
        y_predicted[i] = 1
    else:
        y_predicted[i] = -1
    i = i+1;

accuracy2 = accuracy_score(yt.flatten(),y_predicted)
print(accuracy2)

rmse = mean_squared_error(y, regression_model.predict(X))
r2 = r2_score(y, regression_model.predict(X))

# printing values
print('Slope:' ,regression_model.coef_)
print('Intercept:', regression_model.intercept_)
print('Root mean squared error: ', rmse)
print('R2 score: ', r2)


0.886
0.886
Slope: [[ 0.         -0.15776215 -0.206323  ]]
Intercept: [0.84115361]
Root mean squared error:  0.33812999133802657
R2 score:  0.6613281336758547


In [11]:
X = np.c_[np.ones((X_train3.shape[0], 1)), X_train3]
y = y_train3[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

model = LinearRegressionUsingGD()

model.fit(X, y)
accuracy = model.accuracy(X, y.flatten())
parameters = model.w_
y_pred = model.predict(Xt)

for i in range (0,y_pred.shape[0]):
    if y_pred[i] > 0:
        y_pred[i] = 1
    else:
        y_pred[i] = -1
    i = i+1

accuracy1 = np.mean(y_pred.flatten() == yt.flatten())
print(accuracy1)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Model initialization
regression_model = LinearRegression()
# Fit the data(train the model)
regression_model.fit(X, y)
# Predict
y_predicted = regression_model.predict(Xt)

for i in range(0, y_predicted.shape[0]):
    if y_predicted[i] > 0:
        y_predicted[i] = 1
    else:
        y_predicted[i] = -1
    i = i+1;

accuracy2 = accuracy_score(yt.flatten(),y_predicted)
print(accuracy2)

rmse = mean_squared_error(y, regression_model.predict(X))
r2 = r2_score(y, regression_model.predict(X))

# printing values
print('Slope:' ,regression_model.coef_)
print('Intercept:', regression_model.intercept_)
print('Root mean squared error: ', rmse)
print('R2 score: ', r2)


0.9
0.9
Slope: [[ 0.         -0.13745937 -0.19250944]]
Intercept: [0.71911377]
Root mean squared error:  0.39699301528596964
R2 score:  0.6004498638426231


In [12]:
X = np.c_[np.ones((X_train4.shape[0], 1)), X_train4]
y = y_train4[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))

model = LinearRegressionUsingGD()

model.fit(X, y)
accuracy = model.accuracy(X, y.flatten())
parameters = model.w_
y_pred = model.predict(Xt)

for i in range (0,y_pred.shape[0]):
    if y_pred[i] > 0:
        y_pred[i] = 1
    else:
        y_pred[i] = -1
    i = i+1

accuracy1 = np.mean(y_pred.flatten() == yt.flatten())
print(accuracy1)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Model initialization
regression_model = LinearRegression()
# Fit the data(train the model)
regression_model.fit(X, y)
# Predict
y_predicted = regression_model.predict(Xt)

for i in range(0, y_predicted.shape[0]):
    if y_predicted[i] > 0:
        y_predicted[i] = 1
    else:
        y_predicted[i] = -1
    i = i+1;

accuracy2 = accuracy_score(yt.flatten(),y_predicted)
print(accuracy2)

rmse = mean_squared_error(y, regression_model.predict(X))
r2 = r2_score(y, regression_model.predict(X))

# printing values
print('Slope:' ,regression_model.coef_)
print('Intercept:', regression_model.intercept_)
print('Root mean squared error: ', rmse)
print('R2 score: ', r2)


0.902
0.902
Slope: [[ 0.         -0.15627912 -0.18676223]]
Intercept: [0.74663694]
Root mean squared error:  0.41525895490890374
R2 score:  0.5846596383802187
