diff --git a/algorithms/machine_learning/Logistic-Regression/Predict.m b/algorithms/machine_learning/Logistic-Regression/Predict.m new file mode 100644 index 0000000..539bb5c --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/Predict.m @@ -0,0 +1,17 @@ +function P = Predict(Theta, X) + +n = size(X, 1); + +P = zeros(n, 1); + +for i = 1 : n, + + if Sigmoid(X * Theta)(i) >= 0.5 + + P(i) = 1; + + end + +end + +end \ No newline at end of file diff --git a/algorithms/machine_learning/Logistic-Regression/Sigmoid.m b/algorithms/machine_learning/Logistic-Regression/Sigmoid.m new file mode 100644 index 0000000..b15e6f2 --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/Sigmoid.m @@ -0,0 +1,17 @@ +function G = Sigmoid(Z) + +G = zeros(size(Z)); + +a = size(Z); + +for i = 1 : a(1, 1), + + for j = 1 : a(1, 2), + + G(i, j) = 1 / (1 + e.^-Z(i, j)); + + end + +end + +end \ No newline at end of file diff --git a/algorithms/machine_learning/Logistic-Regression/costfunction.m b/algorithms/machine_learning/Logistic-Regression/costfunction.m new file mode 100644 index 0000000..c4a88f6 --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/costfunction.m @@ -0,0 +1,15 @@ +function [j, Grad] = costfunction(Theta, X, Y) + +n = length(Y); + +j = 0; + +j = (1/n)*(-Y'* log(Sigmoid(X * Theta)) - (1 - Y)'* log(1 - Sigmoid(X * Theta))); % See the sigmoid function. + +Grad = zeros(size(Theta)); + +Grad_ = (1 / n) * ((Sigmoid(X * Theta) - Y)' * X); + +Grad = Grad_'; + +end \ No newline at end of file diff --git a/algorithms/machine_learning/Logistic-Regression/data.txt b/algorithms/machine_learning/Logistic-Regression/data.txt new file mode 100644 index 0000000..d7e2995 --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/data.txt @@ -0,0 +1,100 @@ +34.62365962451697,78.0246928153624,0 +30.28671076822607,43.89499752400101,0 +35.84740876993872,72.90219802708364,0 +60.18259938620976,86.30855209546826,1 +79.0327360507101,75.3443764369103,1 +45.08327747668339,56.3163717815305,0 +61.10666453684766,96.51142588489624,1 +75.02474556738889,46.55401354116538,1 +76.09878670226257,87.42056971926803,1 +84.43281996120035,43.53339331072109,1 +95.86155507093572,38.22527805795094,0 +75.01365838958247,30.60326323428011,0 +82.30705337399482,76.48196330235604,1 +69.36458875970939,97.71869196188608,1 +39.53833914367223,76.03681085115882,0 +53.9710521485623,89.20735013750205,1 +69.07014406283025,52.74046973016765,1 +67.94685547711617,46.67857410673128,0 +70.66150955499435,92.92713789364831,1 +76.97878372747498,47.57596364975532,1 +67.37202754570876,42.83843832029179,0 +89.67677575072079,65.79936592745237,1 +50.534788289883,48.85581152764205,0 +34.21206097786789,44.20952859866288,0 +77.9240914545704,68.9723599933059,1 +62.27101367004632,69.95445795447587,1 +80.1901807509566,44.82162893218353,1 +93.114388797442,38.80067033713209,0 +61.83020602312595,50.25610789244621,0 +38.78580379679423,64.99568095539578,0 +61.379289447425,72.80788731317097,1 +85.40451939411645,57.05198397627122,1 +52.10797973193984,63.12762376881715,0 +52.04540476831827,69.43286012045222,1 +40.23689373545111,71.16774802184875,0 +54.63510555424817,52.21388588061123,0 +33.91550010906887,98.86943574220611,0 +64.17698887494485,80.90806058670817,1 +74.78925295941542,41.57341522824434,0 +34.1836400264419,75.2377203360134,0 +83.90239366249155,56.30804621605327,1 +51.54772026906181,46.85629026349976,0 +94.44336776917852,65.56892160559052,1 +82.36875375713919,40.61825515970618,0 +51.04775177128865,45.82270145776001,0 +62.22267576120188,52.06099194836679,0 +77.19303492601364,70.45820000180959,1 +97.77159928000232,86.7278223300282,1 +62.07306379667647,96.76882412413983,1 +91.56497449807442,88.69629254546599,1 +79.94481794066932,74.16311935043758,1 +99.2725269292572,60.99903099844988,1 +90.54671411399852,43.39060180650027,1 +34.52451385320009,60.39634245837173,0 +50.2864961189907,49.80453881323059,0 +49.58667721632031,59.80895099453265,0 +97.64563396007767,68.86157272420604,1 +32.57720016809309,95.59854761387875,0 +74.24869136721598,69.82457122657193,1 +71.79646205863379,78.45356224515052,1 +75.3956114656803,85.75993667331619,1 +35.28611281526193,47.02051394723416,0 +56.25381749711624,39.26147251058019,0 +30.05882244669796,49.59297386723685,0 +44.66826172480893,66.45008614558913,0 +66.56089447242954,41.09209807936973,0 +40.45755098375164,97.53518548909936,1 +49.07256321908844,51.88321182073966,0 +80.27957401466998,92.11606081344084,1 +66.74671856944039,60.99139402740988,1 +32.72283304060323,43.30717306430063,0 +64.0393204150601,78.03168802018232,1 +72.34649422579923,96.22759296761404,1 +60.45788573918959,73.09499809758037,1 +58.84095621726802,75.85844831279042,1 +99.82785779692128,72.36925193383885,1 +47.26426910848174,88.47586499559782,1 +50.45815980285988,75.80985952982456,1 +60.45555629271532,42.50840943572217,0 +82.22666157785568,42.71987853716458,0 +88.9138964166533,69.80378889835472,1 +94.83450672430196,45.69430680250754,1 +67.31925746917527,66.58935317747915,1 +57.23870631569862,59.51428198012956,1 +80.36675600171273,90.96014789746954,1 +68.46852178591112,85.59430710452014,1 +42.0754545384731,78.84478600148043,0 +75.47770200533905,90.42453899753964,1 +78.63542434898018,96.64742716885644,1 +52.34800398794107,60.76950525602592,0 +94.09433112516793,77.15910509073893,1 +90.44855097096364,87.50879176484702,1 +55.48216114069585,35.57070347228866,0 +74.49269241843041,84.84513684930135,1 +89.84580670720979,45.35828361091658,1 +83.48916274498238,48.38028579728175,1 +42.2617008099817,87.10385094025457,1 +99.31500880510394,68.77540947206617,1 +55.34001756003703,64.9319380069486,1 +74.77589300092767,89.52981289513276,1 \ No newline at end of file diff --git a/algorithms/machine_learning/Logistic-Regression/plotdata.m b/algorithms/machine_learning/Logistic-Regression/plotdata.m new file mode 100644 index 0000000..0f206ce --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/plotdata.m @@ -0,0 +1,19 @@ +function plotdata(x, Y) + +figure; + +hold on; + +Pos = find(Y == 1); + +Neg = find(Y == 0); + +plot(x(Pos, 1), x(Pos, 2), 'k+','LineWidth', 2, ... + 'MarkerSize', 7); + +plot(x(Neg, 1), x(Neg, 2), 'ko', 'MarkerFaceColor', 'y', ... + 'MarkerSize', 7); + +hold off; + +end \ No newline at end of file diff --git a/algorithms/machine_learning/Logistic-Regression/plotdecisionboundary.m b/algorithms/machine_learning/Logistic-Regression/plotdecisionboundary.m new file mode 100644 index 0000000..66aeddd --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/plotdecisionboundary.m @@ -0,0 +1,43 @@ +function plotdecisionboundary(Theta, X, Y) + +plotdata(X(:,2:3), Y); + +hold on; + +if size(X, 2) <= 3 + + plot_X = [min(X(:,2))-2, max(X(:,2))+2]; + + plot_Y = (-1./Theta(3)).*(Theta(2).*plot_X + Theta(1)); + + plot(plot_X, plot_Y); + + legend('Admitted', 'Not admitted', 'Decision Boundary'); + + axis([30, 100, 30, 100]); + +else + + U = linspace(-1, 1.5, 50); + + V = linspace(-1, 1.5, 50); + + Z = zeros(length(U), length(V)); + + for i = 1:length(U) + + for j = 1:length(V) + + Z(i,j) = mapFeature(U(i), V(j))*Theta; + + end + + end + + Z = Z'; + +end + +hold off; + +end \ No newline at end of file diff --git a/algorithms/machine_learning/Logistic-Regression/runLogisticRegression.m b/algorithms/machine_learning/Logistic-Regression/runLogisticRegression.m new file mode 100644 index 0000000..75034ca --- /dev/null +++ b/algorithms/machine_learning/Logistic-Regression/runLogisticRegression.m @@ -0,0 +1,82 @@ +clear; close all; clc; + +Data = load('data.txt'); +x = Data(:, [1, 2]); Y = Data(:, 3); + +fprintf(['Plotting data with + indicating (Y = 1) examples and o ' ... + 'indicating (Y = 0) examples.\n']); + +plotdata(x, Y); + +hold on; + +xlabel('Exam 1 score'); +ylabel('Exam 2 score'); + +legend('Admitted', 'Not admitted'); +hold off; + +fprintf('\nProgram paused, press enter to continue.\n'); +pause; + +[m, n] = size(x); + +X = [ones(m, 1) x]; + +Initial_Theta = zeros(n + 1, 1); + +[Cost, Grad] = costfunction(Initial_Theta, X, Y); + +fprintf('Cost at initial theta (zeros): %f\n', Cost); +fprintf('Expected cost (approx): 0.693\n'); +fprintf('Gradient at initial theta (zeros): \n'); +fprintf(' %f \n', Grad); +fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n'); + +Test_Theta = [-24; 0.2; 0.2]; +[Cost, Grad] = costfunction(Test_Theta, X, Y); + +fprintf('\nCost at test theta: %f\n', Cost); +fprintf('Expected cost (approx): 0.218\n'); +fprintf('Gradient at test theta: \n'); +fprintf(' %f \n', Grad); +fprintf('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n'); + +fprintf('\nProgram paused, press enter to continue.\n'); +pause; + +Options = optimset('GradObj', 'on', 'MaxIter', 400); + +[Theta, Cost] = ... + fminunc(@(t)(costfunction(t, X, Y)), Initial_Theta, Options); + +fprintf('Cost at theta found by fminunc: %f\n', Cost); +fprintf('Expected cost (approx): 0.203\n'); +fprintf('theta: \n'); +fprintf(' %f \n', Theta); +fprintf('Expected theta (approx):\n'); +fprintf(' -25.161\n 0.206\n 0.201\n'); + +plotdecisionboundary(Theta, X, Y); + +hold on; + +xlabel('Exam 1 score'); +ylabel('Exam 2 score'); + +legend('Admitted', 'Not admitted'); +hold off; + +fprintf('\nProgram paused, press enter to continue.\n'); +pause; + +Prob = Sigmoid([1 45 85] * Theta); +fprintf(['For a student with scores 45 and 85, we predict an admission ' ... + 'probability of %f\n'], Prob); +fprintf('Expected value: 0.775 +/- 0.002\n\n'); + +P = Predict(Theta, X); + +fprintf('Train accuracy: %f\n', mean(double(P == Y)) * 100); +fprintf('Expected accuracy (approx): 89.0\n'); +fprintf('\n'); \ No newline at end of file