In [7]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np
from numpy import asarray
from numpy import savetxt

# Plotting library
from matplotlib import pyplot
import matplotlib.image as mpimg
import matplotlib.cm as cm 

# Optimization module in scipy
from scipy import optimize
from scipy import misc

# will be used to load MATLAB mat datafile format
from scipy.io import loadmat

# library written for this exercise providing additional functions for assignment submission, and others
import utils

# importing os module 
import os 

from skimage import color
from skimage import io
from skimage.transform import rescale, resize, downscale_local_mean

# tells matplotlib to embed plots within the notebook
%matplotlib inline

def sigmoid(x):
    return 1/(1 + np.exp(-x))

"""
Computes the cost of using theta as the parameter for regularized
logistic regression and the gradient of the cost w.r.t. to the parameters.

Parameters
----------
theta : array_like
    Logistic regression parameters. A vector with shape (n, ). n is 
    the number of features including any intercept.  

X : array_like
    The data set with shape (m x n). m is the number of examples, and
    n is the number of features (including intercept).

y : array_like
    The data labels. A vector with shape (m, ).

lambda_ : float
    The regularization parameter. 

Returns
-------
J : float
    The computed value for the regularized cost function. 

grad : array_like
    A vector of shape (n, ) which is the gradient of the cost
    function with respect to theta, at the current values of theta.   
"""
def lrCostFunction(theta, X, y, lambda_):
    #Initialize some useful values
    m = y.size
    # convert labels to ints if their type is bool
    if y.dtype == bool:
        y = y.astype(int)
    
    # You need to return the following variables correctly
    J = 0
    grad = np.zeros(theta.shape)
    
    # ====================== YOUR CODE HERE ======================
    h = sigmoid(X.dot(theta.T))
    
    temp = theta
    temp[0] = 0
    
    J=(1/m) * ((-y.dot(np.log(h)))-(1-y).dot(np.log(1-h)))+ ((lambda_/(2*m))* np.sum(np.square(temp)))
    grad = (1 / m) * (h - y).dot(X) 
    grad = grad + (lambda_ / m) * temp
    # =============================================================
    return J, grad


"""
Trains num_labels logistic regression classifiers and returns
each of these classifiers in a matrix all_theta, where the i-th
row of all_theta corresponds to the classifier for label i.

Parameters
----------
X : array_like
    The input dataset of shape (m x n). m is the number of 
    data points, and n is the number of features. Note that we 
    do not assume that the intercept term (or bias) is in X, however
    we provide the code below to add the bias term to X. 

y : array_like
    The data labels. A vector of shape (m, ).

num_labels : int
    Number of possible labels.

lambda_ : float
    The logistic regularization parameter.

Returns
-------
all_theta : array_like
    The trained parameters for logistic regression for each class.
    This is a matrix of shape (K x n+1) where K is number of classes
    (ie. `numlabels`) and n is number of features without the bias.

"""
def oneVsAll(X, y, num_labels, lambda_):

    # Some useful variables
    m, n = X.shape
    
    # You need to return the following variables correctly 
    all_theta = np.zeros((num_labels, n + 1))#10x401

    # Add ones to the X data matrix
    X = np.concatenate([np.ones((m, 1)), X], axis=1)

    # ====================== YOUR CODE HERE ======================
    #num_labels = 10 
    #y = 5000, filled with 0s-9s
    for c in range(num_labels):
        initial_theta = np.zeros(n + 1)#401 x 1
        options = {'maxiter': 50}
        res = optimize.minimize(lrCostFunction, 
                                initial_theta, 
                                (X, (y == c), lambda_), 
                                jac=True, 
                                method='CG',
                                options=options) 
        all_theta[c] = res.x
    # ============================================================
    return all_theta    


"""
Return a vector of predictions for each example in the matrix X. 
Note that X contains the examples in rows. all_theta is a matrix where
the i-th row is a trained logistic regression theta vector for the 
i-th class. You should set p to a vector of values from 0..K-1 
(e.g., p = [0, 2, 0, 1] predicts classes 0, 2, 0, 1 for 4 examples) .

Parameters
----------
all_theta : array_like
    The trained parameters for logistic regression for each class.
    This is a matrix of shape (K x n+1) where K is number of classes
    and n is number of features without the bias.

X : array_like
    Data points to predict their labels. This is a matrix of shape 
    (m x n) where m is number of data points to predict, and n is number 
    of features without the bias term. Note we add the bias term for X in 
    this function. 

Returns
-------
p : array_like
    The predictions for each data point in X. This is a vector of shape (m, ).

"""
def predictOneVsAll(all_theta, X):
    m = X.shape[0];
    num_labels = all_theta.shape[0]

    # You need to return the following variables correctly 
    p = np.zeros(m)

    # Add ones to the X data matrix
    X = np.concatenate([np.ones((m, 1)), X], axis=1)

    # ====================== YOUR CODE HERE ======================
    p = np.argmax(sigmoid(X.dot(all_theta.T)), axis = 1)

    # ============================================================
    return p


X_size = 14284 #2046 validation 14284 train 2039 valid which are 691200 and 14284 which are 691200 
Gray_size = 57600  #230400

def loadData(directory_path):
    labels_int = np.zeros((X_size))
    gray_arrays = np.zeros((X_size,Gray_size))
    directory_path
    ext = ('.jpg')
    i = 0
    j = 0
    for directory in os.listdir(directory_path):
        newPath = directory_path + '\\' + directory
        for file in os.listdir(newPath):
            if file.endswith(ext):
                path = newPath + '\\' + file

                img = io.imread(path)
                if(img.size == 691200):
                    imgGray = color.rgb2gray(img)
                    res_img = rescale(imgGray, 0.5, anti_aliasing=False)
                    imgn = np.reshape(res_img,(1,Gray_size),order='F')
                    gray_arrays[i] = imgn
                    type = directory
                    match type:
                        case 'd4':
                            labels_int[i]=0
                        case 'd6':
                            labels_int[i]=1
                        case 'd8':
                            labels_int[i]=2
                        case 'd10':
                            labels_int[i]=3
                        case 'd12':
                            labels_int[i]=4
                        case 'd20':
                            labels_int[i]=5   
                else:
                    continue

                i = i + 1   
                j=j+1                                                                                                                
            else:
                continue
    i = 0

    return gray_arrays, labels_int,j
  
def main():
    gray_arrays, labels_int,j = loadData(r"C:\Users\maart\Documents\IIW\Machine\Taak\Data\dice-d4-d6-d8-d10-d12-d20\dice\train")
    X_t = np.concatenate([np.ones((X_size, 1)), gray_arrays], axis=1)
    # print(X_t)
    X = X_t
    y_t = labels_int
    y = labels_int

    num_labels = 6

    # test values for the parameters theta
    theta_t = np.zeros(57601 )

    # test value for the regularization parameter
    lambda_t = 0.001

    J, grad = lrCostFunction(theta_t, X_t, y_t, lambda_t)
    print('Cost         : {:.6f}'.format(J))
    print(' [{:.6f}, {:.6f}, {:.6f}, {:.6f}]'.format(*grad))

    lambda_ = 0.001
    all_theta = oneVsAll(X, y, num_labels, lambda_)
    pred = predictOneVsAll(all_theta, X)
    print('Training Set Accuracy: {:.2f}%'.format(np.mean(pred == y) * 100))

main()

Cost         : 0.693147
 [-2.004201, -1.470333, -1.469278, -1.468951]
Training Set Accuracy: 66.09%
