TheAlgorithms · cozek · May 31, 2020 · May 27, 2020
diff --git a/algorithms/machine_learning/Linear-Regression/computecost.m b/algorithms/machine_learning/Linear-Regression/computecost.m
@@ -0,0 +1,9 @@
+function j = computecost(x, Y, Theta)
+
+n = length(Y); % Number of training examples.
+
+j = 0;
+
+j = (1 / (2 * n)) * sum(((x * Theta) - Y).^2); 
+
+end
diff --git a/algorithms/machine_learning/Linear-Regression/data_.txt b/algorithms/machine_learning/Linear-Regression/data_.txt
@@ -0,0 +1,97 @@
+6.1101,17.592
+5.5277,9.1302
+8.5186,13.662
+7.0032,11.854
+5.8598,6.8233
+8.3829,11.886
+7.4764,4.3483
+8.5781,12
+6.4862,6.5987
+5.0546,3.8166
+5.7107,3.2522
+14.164,15.505
+5.734,3.1551
+8.4084,7.2258
+5.6407,0.71618
+5.3794,3.5129
+6.3654,5.3048
+5.1301,0.56077
+6.4296,3.6518
+7.0708,5.3893
+6.1891,3.1386
+20.27,21.767
+5.4901,4.263
+6.3261,5.1875
+5.5649,3.0825
+18.945,22.638
+12.828,13.501
+10.957,7.0467
+13.176,14.692
+22.203,24.147
+5.2524,-1.22
+6.5894,5.9966
+9.2482,12.134
+5.8918,1.8495
+8.2111,6.5426
+7.9334,4.5623
+8.0959,4.1164
+5.6063,3.3928
+12.836,10.117
+6.3534,5.4974
+5.4069,0.55657
+6.8825,3.9115
+11.708,5.3854
+5.7737,2.4406
+7.8247,6.7318
+7.0931,1.0463
+5.0702,5.1337
+5.8014,1.844
+11.7,8.0043
+5.5416,1.0179
+7.5402,6.7504
+5.3077,1.8396
+7.4239,4.2885
+7.6031,4.9981
+6.3328,1.4233
+6.3589,-1.4211
+6.2742,2.4756
+5.6397,4.6042
+9.3102,3.9624
+9.4536,5.4141
+8.8254,5.1694
+5.1793,-0.74279
+21.279,17.929
+14.908,12.054
+18.959,17.054
+7.2182,4.8852
+8.2951,5.7442
+10.236,7.7754
+5.4994,1.0173
+20.341,20.992
+10.136,6.6799
+7.3345,4.0259
+6.0062,1.2784
+7.2259,3.3411
+5.0269,-2.6807
+6.5479,0.29678
+7.5386,3.8845
+5.0365,5.7014
+10.274,6.7526
+5.1077,2.0576
+5.7292,0.47953
+5.1884,0.20421
+6.3557,0.67861
+9.7687,7.5435
+6.5159,5.3436
+8.5172,4.2415
+9.1802,6.7981
+6.002,0.92695
+5.5204,0.152
+5.0594,2.8214
+5.7077,1.8451
+7.6366,4.2959
+5.8707,7.2029
+5.3054,1.9869
+8.2934,0.14454
+13.394,9.0551
+5.4369,0.61705
diff --git a/algorithms/machine_learning/Linear-Regression/gradientdescent.m b/algorithms/machine_learning/Linear-Regression/gradientdescent.m
@@ -0,0 +1,29 @@
+% This function demonstrates gradient descent in case of linear regression with one variable.
+
+% Theta is a column vector with two elements which this function returns after modifying it.
+
+% This function receives the feature vector x, vector of actual target variables Y, Theta
+
+% containing initial values of theta_0 and theta_1, learning rate Alpha, number of iterations
+
+% noi.
+
+function Theta = gradientdescent(x, Y, Theta, Alpha, noi)
+
+    n = length(Y); % Number of training examples. 
+
+    for i = 1:noi 
+
+        theta_1 = Theta(1) - Alpha * (1 / n) * sum(((x * Theta) - Y) .* x(:, 1)); % Temporary variable to simultaneously update theta_0 but i have used 1 to
+
+                                                                                  % avoid confusion since indexing in MATLAB/Octave starts from 1.
+
+        theta_2 = Theta(2) - Alpha * (1 / n) * sum(((x * Theta) - Y) .* x(:, 2)); % Temporary variable to simultaneously update theta_1.
+
+        Theta(1) = theta_1; % Assigning first temporary value to update first actual value simultaneously.  
+
+        Theta(2) = theta_2; % Assigning second temporary value to update second actual value simultaneously. 
+
+    end
+
+end
diff --git a/algorithms/machine_learning/Linear-Regression/plotdata.m b/algorithms/machine_learning/Linear-Regression/plotdata.m
@@ -0,0 +1,11 @@
+function plotdata(x, Y)
+
+figure;
+
+plot(x, Y, 'rx', 'MarkerSize', 10); % rx means red coloured x.
+
+ylabel('Profit in $10,000s');
+
+xlabel('Population of city in 10,000s');
+
+end
diff --git a/algorithms/machine_learning/Linear-Regression/runLinearRegression.m b/algorithms/machine_learning/Linear-Regression/runLinearRegression.m
@@ -0,0 +1,59 @@
+% This file runs univariate linear regression to predict profits of food trucks based on previous
+
+% actual values of profits in $10,000s in various cities with populations in 10,000s respectively. 
+
+clear ; close all; clc ;
+
+fprintf('Plotting data\n');
+
+data = load('data_.txt');
+x = data(:, 1); Y = data(:, 2);
+n = length(Y); % Number of training examples.
+
+plotdata(x, Y);
+
+fprintf('Program paused, press enter to continue\n');
+
+pause;
+
+x = [ones(n, 1), data(:,1)]; 
+Theta = zeros(2, 1);
+
+noi = 1500;   % Number of iterations in gradient descent. 
+Alpha = 0.01; % Learning rate.
+
+fprintf('Testing the cost function\n')
+
+j = computecost(x, Y, Theta);
+fprintf('With Theta = [0 ; 0]\nCost computed = %f\n', j);
+fprintf('Expected cost value (approx) 32.07\n');
+
+j = computecost(x, Y, [-1 ; 2]);
+fprintf('With theta = [-1 ; 2]\nCost computed = %f\n', j);
+fprintf('Expected cost value (approx) 54.24\n');
+
+fprintf('Program paused, press enter to continue\n');
+
+pause;
+
+fprintf('Running gradient descent\n');
+
+Theta = gradientdescent(x, Y, Theta, Alpha, noi);
+
+fprintf('Theta found by gradient descent\n');
+fprintf('%f\n', Theta);
+fprintf('Expected Theta vector (approx)\n');
+fprintf(' -3.6303\n  1.1664\n\n');
+
+hold on;                                                           % To plot hypothesis on data. 
+
+plot(x(:, 2), x * Theta, '-');
+legend('Training data', 'Linear regression');
+
+predict1 = [1, 3.5] * Theta;
+fprintf('For population = 35,000, we predict a profit of %f\n',...
+    predict1*10000);
+
+predict2 = [1, 7] * Theta;
+fprintf('For population = 70,000, we predict a profit of %f\n',...
+    predict2*10000);