# Libraries:

In [86]:
#Importing some functions
from csv import reader
from random import randrange
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

# Loading CSV:

In [87]:
#Function to load csv:
def load_csv(filename):
	dataset = list() 
	with open(filename, 'r') as file:
		csv_reader = reader(file)
		for col in csv_reader:
			if not col:
				continue
			dataset.append(col) #Fetching the value in the dataset from file
	return dataset

In [88]:
#String column to float operaton:
def str_column_to_float(dataset, column):
	for col in dataset:
		col[column] = float(col[column].strip())   #Using the Strip Function for this operation

# Fetching the file from the system:

In [89]:
#Loading the required CSV file from the system:      
filename = 'data2.csv'
dataset = load_csv(filename)
len(dataset)              #Length of the dataset

21613

In [90]:
for i in range(len(dataset[0])):
	str_column_to_float(dataset, i)     #Converting string column to float values

# Distribution of data in input and output variables:

In [91]:
#Initializing Input and Ouput Features of Dataset:
X = [row[1:9] for row in dataset]
Y = [row[0] for row in dataset]

In [92]:
len(X)    #Length of X

21613

In [93]:
len(Y)   #Length of Y

21613

# Feature Scaling of all the input features:

In [94]:
#Scaling the data as there was a lot of difference in the scales of the parameters:
for i in range(len(X[0])):
    for index, item in enumerate(X):
        try:
            X[index][i] = int(item[i])
        except Exception as e:
            X[index][i] = 0


    max_val = max([row[i] for row in X])
    min_val = min([row[i] for row in X])
    for index, item in enumerate(X):
        try:
            X[index][i] = (item[i]-min_val)/(max_val-min_val) #Formula to get the scaling between 0 and 1
        except Exception as e:
            X[index][i] = 0

In [95]:
#Displaying the Scaled values of X:
len(X)

21613

In [96]:
#Defining X as XX for our convenience:
XX=X
len(XX)

21613

# Splitting the data:

In [97]:
# Split the data into a train and test set:
def train_test_split1(XX, split = 0.8):  #split is 0.8 to keep the division as 80% training data and 20% for testing.
	X_train = list()
	X_train_size = split * len(XX)      #Splitting into training and test set
	dataset_copy = list(XX)           #Keeping a copy of the data in XX
	while len(X_train) < X_train_size:
		index = randrange(len(dataset_copy))
		X_train.append(dataset_copy.pop(index))   #Now the training data set is fetched into the copied dataset.
	return X_train, dataset_copy

In [98]:
X_train,X_test = train_test_split1(XX)
len(X_test)

4322

In [99]:
# Split the data into a train and test set:
def train_crossvalidation_split2(X_train, split = 0.75):  #split is 0.75 to keep the division as 75% training data and 25% for crossvalidation.
	X_crossv = list()
	X_crossv_size = split * len(X_train)       #Splitting into training and cross-validation set
	dataset_copy = list(X_train)           #Keeping a copy of the data in X_train
	while len(X_crossv) < X_crossv_size:
		index = randrange(len(dataset_copy))
		X_crossv.append(dataset_copy.pop(index))   #Now the training data set is fetched into the copied dataset.
	return X_crossv, dataset_copy

In [100]:
X_train_set,X_crossvalidation_set = train_crossvalidation_split2(X_train)    #Splitting the data finally for X 

# Checking the number of examples and the division of Input data

In [101]:
#Length of test set:
len(X_train_set)

12969

In [102]:
#Length of Training set
len(X_crossvalidation_set)

4322

In [103]:
len(X_test)

4322

In [104]:
# Split the data into test set for finding Y_test:
def train_test_split3(Y, split = 0.8):  #split is 0.8 to keep the division as 80% training data and 20% for testing.
	Y_train = list()
	Y_train_size = split * len(Y)      #Splitting into training and test set
	dataset_copy = list(Y)           #Keeping a copy of the data in Y
	while len(Y_train) < Y_train_size:
		index = randrange(len(dataset_copy))
		Y_train.append(dataset_copy.pop(index))   #Now the training data set is fetched into the copied dataset.
	return Y_train, dataset_copy

In [105]:
Y_train,Y_test = train_test_split3(Y) #Train-Test Split for Y

In [106]:
len(Y_train)  #Length of Training set for Y

17291

In [107]:
len(Y_test)   #length of Testing Set for Y

4322

In [108]:
# Split the data into a train and crossvalidation set for Y_train and Y_crossvalidate:
def train_crossvalidation_split4(Y_train, split = 0.75):  #split is 0.75 to keep the division as 75% training data and 25% for crossvalidation.
	Y_crossv = list()
	Y_crossv_size = split * len(Y_train)      #Splitting into training and cross-validation set
	dataset_copy = list(Y_train)           #Keeping a copy of the data in Y_train 
	while len(Y_crossv) < Y_crossv_size:
		index = randrange(len(dataset_copy))
		Y_crossv.append(dataset_copy.pop(index))   #Now the training data set is fetched into the copied dataset.
	return Y_crossv, dataset_copy

In [109]:
Y_train_set, Y_crossvalidation_set = train_crossvalidation_split4(Y_train) #Split for cross-validation

# Checking the number of examples and the division of Output data

In [110]:
len(Y_train_set) 

12969

In [111]:
len(Y_crossvalidation_set)

4322

In [112]:
len(Y_test)

4322

# Converting data into int type for proper use:

In [113]:
#Converting X_train and X_test to float type
for i in range(len(X_train_set[0])):
    for index, item in enumerate(X_train_set):
        try:
            X_train_set[index][i] = float(item[i])
        except Exception as e:                  #exception handling in case any error occur 
            X_train_set[index][i] = 0
    for index, item in enumerate(X_test):
        try:
            X_test[index][i] = float(item[i])
        except Exception as e:
            X_test[index][i] = 0

#Converting X_train and X_test to int type 
for i in range(len(X_train_set[0])):
    for index, item in enumerate(X_train_set):
        try:
            X_train_set[index][i] = int(item[i])
        except Exception as e:
            X_train_set[index][i] = 0
    for index, item in enumerate(X_test):
        try:
            X_test[index][i] = int(item[i])
        except Exception as e:
            X_test[index][i] = 0
            
#Converting Y_train and Y_test to float type 
for j,y in enumerate(Y_train_set):
        try:
            Y_train_set[j] = float(y)
        except:
            Y_train_set[j]=0
for j, y in enumerate(Y_test):
        try:
            Y_test[j] = float(y)
        except:
            Y[j]=0

# Training Code:

In [114]:

#Hypothesis Function for Training data:
def Hypothesis_Fn(X_train_set, thetas):
    theta = 0
    y_pred = 0
    for weight in thetas:
        if theta == 0:
            y_pred =y_pred + weight
        else:
            y_pred =y_pred + weight*X_train_set[theta-1]
        theta += 1
    return y_pred
thetas = [6,3,4,5,5,5,9,9,1]
alpha = 0.00005
#Cost Function for the training set of the model:
def Cost_Fn_for_training(X_train_set, Y_train_set):
    m = len(X_train_set)
    J_theta = 0
    for a,b in enumerate(X_train_set):
        cost = Hypothesis_Fn(b,thetas)-Y_train_set[a]
        cost += cost*cost
        J_theta += cost
    J_theta = (1/(2*m))*J_theta
    return J_theta


#Regularization
l = 5
def Regualrization(X_train_set, Y_train_set):
    summ = 0
    m = len(X_train_set)
    for i,weight in enumerate(thetas):
        summ += thetas[i]*thetas[i]
    regularization = (l/(2*m))*summ
    J_theta = Cost_Fn_for_training(X_train_set,Y_train_set)
    cost = J_theta + regularization
    return cost

#Gradient Descent for Regularized Thetas:
def Gradient_Descent(X_train_set, Y_train_set, epoch):
    m = len (X_train_set)
    J_theta = 0
    for a,b in enumerate(X_train_set):
        cost = Hypothesis_Fn(b,thetas)-Y_train_set[a]
        if epoch == 0:
            J_theta = cost
        else:
            J_theta += (cost*b[epoch-1])
    J_theta += (1/m)*J_theta
    return J_theta

#Update values of thetas:
def Update_thetas(X_train_set,Y_train_set):
    m = len(X_train_set)
    for a,b in enumerate(thetas):
        thetas[a] = thetas[a] - alpha*(Gradient_Descent(X_train_set,Y_train_set,a) + ((l/m)*thetas[a]))
        
#Training:
cost =[]
def Training_model(X_train_set, Y_train_set, epoch):
    for a in range(epoch):
        J_theta = Regualrization(X_train_set,Y_train_set)
        cost.append(J_theta)
        print(a,J_theta)
        Update_thetas(X_train_set,Y_train_set)
        


In [None]:
cost = []
Training_model(X_train_set,Y_train_set,100)

0 211839889839.13998
1 210585862251.26163
2 209443219441.51648
3 208401227668.9007
4 207450208319.1947
5 206581433352.41742
6 205787031143.0855
7 205059901678.86218
8 204393640186.36246
9 203782468345.41327
10 203221172336.73203
11 202705047043.03357
12 202229845791.45554
13 201791735085.8054
14 201387253832.47928
15 201013276612.89075
16 200666980600.0346
17 200345815756.63846
18 200047477988.85458
19 199769884961.1624
20 199511154308.34702
21 199269584005.99686
22 199043634685.0589
23 198831913697.23584
24 198633160757.24557
25 198446235005.27966
26 198270103348.58633
27 198103829955.14008
28 197946566784.98077
29 197797545056.25107
30 197656067553.09216
31 197521501691.91867
32 197393273270.7934
33 197270860834.18466
34 197153790592.06033
35 197041631838.3871
36 196933992819.5577
37 196830517008.1521
38 196730879741.9246
39 196634785191.84805
40 196541963626.67133
41 196452168944.65738
42 196365176446.1073
43 196280780822.85574
44 196198794343.35062
45 196119045213.97745
46 19604137

# Plot for Training with respect to cost calculated:

In [None]:
plt.plot(range(100),cost)

# Code for Cross Validation:

In [None]:
def Cross_Validation(X_crossvalidation_set, Y_crossvalidation_set):           #measuring how well model is performing (minimum mean well)
    m = len(X_crossvalidation_set)
    J_theta = 0
    prediction=[]
    for i, x in enumerate(X_crossvalidation_set):
        costa = Hypothesis_Fn(x, thetas)- Y_crossvalidation_set[i]
        cost = costa * costa
        prediction.append(costa)
        J_theta += cost
    J_theta  = (1/(2*m))*J_theta                            #cost on test data 
    return J_theta,prediction

In [None]:
Crossvalidate_predicted_cost,prediction=Cross_Validation(X_crossvalidation_set,Y_crossvalidation_set)
Crossvalidate_predicted_cost

# Testing_Code:

In [None]:
def Testing(X_test, Y_test):           #measuring how well model is performing (minimum mean well)
    m = len(X_test)
    J_theta = 0
    prediction=[]
    for i, x in enumerate(X_test):
        costa = Hypothesis_Fn(x, thetas)- Y_test[i]
        cost = costa * costa
        prediction.append(costa)
        J_theta += cost
    J_theta  = (1/(2*m))*J_theta                            #cost on test data 
    return J_theta,prediction

In [None]:
test_set_predicted_cost,prediction = Testing(X_test,Y_test)
test_set_predicted_cost

# Prediction Function

In [None]:
X = [1,4,5,6,8,3,2,9]

In [None]:
def Prediction_Function(X, thetas):
    theta = 0
    y_pred = 0
    for weight in thetas:
        if theta == 0:
            y_pred =y_pred + weight
        else:
            y_pred =y_pred + weight*X[theta-1]
        theta += 1
    return y_pred

In [None]:
Prediction_Function(X,thetas)