In [7]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np
from numpy import asarray
from numpy import savetxt

# Plotting library
from matplotlib import pyplot
import matplotlib.image as mpimg
import matplotlib.cm as cm 

# Optimization module in scipy
from scipy import optimize
from scipy import misc

# will be used to load MATLAB mat datafile format
from scipy.io import loadmat

# library written for this exercise providing additional functions for assignment submission, and others
import utils

# importing os module 
import os 

# tells matplotlib to embed plots within the notebook
%matplotlib inline






"""
Computes the cost of using theta as the parameter for regularized
logistic regression and the gradient of the cost w.r.t. to the parameters.

Parameters
----------
theta : array_like
    Logistic regression parameters. A vector with shape (n, ). n is 
    the number of features including any intercept.  

X : array_like
    The data set with shape (m x n). m is the number of examples, and
    n is the number of features (including intercept).

y : array_like
    The data labels. A vector with shape (m, ).

lambda_ : float
    The regularization parameter. 

Returns
-------
J : float
    The computed value for the regularized cost function. 

grad : array_like
    A vector of shape (n, ) which is the gradient of the cost
    function with respect to theta, at the current values of theta.   
"""
def lrCostFunction(theta, X, y, lambda_):
    #Initialize some useful values
    m = y.size
    # convert labels to ints if their type is bool
    if y.dtype == bool:
        y = y.astype(int)
    
    # You need to return the following variables correctly
    J = 0
    grad = np.zeros(theta.shape)
    
    # ====================== YOUR CODE HERE ======================
    h = utils.sigmoid(X.dot(theta.T))
    
    temp = theta
    temp[0] = 0
    
    J=(1/m) * ((-y.dot(np.log(h)))-(1-y).dot(np.log(1-h)))+ ((lambda_/(2*m))* np.sum(np.square(temp)))
    grad = (1 / m) * (h - y).dot(X) 
    grad = grad + (lambda_ / m) * temp
    # =============================================================
    return J, grad


"""
Trains num_labels logistic regression classifiers and returns
each of these classifiers in a matrix all_theta, where the i-th
row of all_theta corresponds to the classifier for label i.

Parameters
----------
X : array_like
    The input dataset of shape (m x n). m is the number of 
    data points, and n is the number of features. Note that we 
    do not assume that the intercept term (or bias) is in X, however
    we provide the code below to add the bias term to X. 

y : array_like
    The data labels. A vector of shape (m, ).

num_labels : int
    Number of possible labels.

lambda_ : float
    The logistic regularization parameter.

Returns
-------
all_theta : array_like
    The trained parameters for logistic regression for each class.
    This is a matrix of shape (K x n+1) where K is number of classes
    (ie. `numlabels`) and n is number of features without the bias.

"""
def oneVsAll(X, y, num_labels, lambda_):

    # Some useful variables
    m, n = X.shape
    
    # You need to return the following variables correctly 
    all_theta = np.zeros((num_labels, n + 1))#10x401

    # Add ones to the X data matrix
    X = np.concatenate([np.ones((m, 1)), X], axis=1)

    # ====================== YOUR CODE HERE ======================
    #num_labels = 10 
    #y = 5000, filled with 0s-9s
    for c in range(num_labels):
        initial_theta = np.zeros(n + 1)#401 x 1
        options = {'maxiter': 50}
        res = optimize.minimize(lrCostFunction, 
                                initial_theta, 
                                (X, (y == c), lambda_), 
                                jac=True, 
                                method='CG',
                                options=options) 
        all_theta[c] = res.x
    # ============================================================
    return all_theta    


"""
Return a vector of predictions for each example in the matrix X. 
Note that X contains the examples in rows. all_theta is a matrix where
the i-th row is a trained logistic regression theta vector for the 
i-th class. You should set p to a vector of values from 0..K-1 
(e.g., p = [0, 2, 0, 1] predicts classes 0, 2, 0, 1 for 4 examples) .

Parameters
----------
all_theta : array_like
    The trained parameters for logistic regression for each class.
    This is a matrix of shape (K x n+1) where K is number of classes
    and n is number of features without the bias.

X : array_like
    Data points to predict their labels. This is a matrix of shape 
    (m x n) where m is number of data points to predict, and n is number 
    of features without the bias term. Note we add the bias term for X in 
    this function. 

Returns
-------
p : array_like
    The predictions for each data point in X. This is a vector of shape (m, ).

"""
def predictOneVsAll(all_theta, X):
    m = X.shape[0];
    num_labels = all_theta.shape[0]

    # You need to return the following variables correctly 
    p = np.zeros(m)

    # Add ones to the X data matrix
    X = np.concatenate([np.ones((m, 1)), X], axis=1)

    # ====================== YOUR CODE HERE ======================
    p = utils.sigmoid(X.dot(all_theta.T))
    
    # Adding one because Python uses zero based indexing for the 10 columns (0-9),
    # while the 10 classes are numbered from 1 to 10.
    return(np.argmax(p, axis=1))
    # ============================================================
    return p


"""
Loads the img and converts it into an array, then converts it into a gray scale array

Parameters
----------
path : string
    The path including the img to the image needing to be converted

Returns
-------   
gray_array : array_like
    An array with the gray scale value of the pixels
"""
def img2gray(path):
    img_array = mpimg.imread(path)  
    red=img_array[:,:,0]
    green=img_array[:,:,1]
    blue=img_array[:,:,2]
    gray_array = np.array([])
    gray_array = np.append(gray_array, 0.2126 * red)
    gray_array = np.append(gray_array, 0.7152 * green)
    gray_array = np.append(gray_array, 0.0722 * blue)
    # gray_array = (0.2126 * red) + (0.7152 * green) + (0.0722 * blue) 
    # pyplot.imshow(gray_array, cmap = pyplot.cm.Greys_r)
    # pyplot.show() 
    return gray_array



"""
Loads the all the images and ad label of die type as a tuple together with the grayscale image 
Then add it into a array

Parameters
----------
path : string
    The path to the dataset

Returns
-------   
dice : array_like
    array of tuples, each tupple contains a string (label) of which type of die it is, and a grayscale image array (which is an array of arrays)

labels :
"""
def loadData(path):
    labels_int = np.array([])
    i = 0
    gray_arrays = np.array([])
    path_of_the_directory = path
    ext = ('.jpg')
    for directory in os.listdir(path_of_the_directory):
        newPath = path_of_the_directory + '\\' + directory
        for file in os.listdir(newPath):
            if file.endswith(ext):
                path = newPath + '\\' + file
                # img = img2gray(path)
                type = directory
                # gray_arrays = np.append(gray_arrays,img,axis = 0)
                i = i + 1
                # np.append(dice,img_tuple)
                # np.append(labels,type)
                # match type:
                #     case 'd4':
                #         labels_int = np.append(labels_int,0)
                #         # labels_int.append(0)
                #     case 'd6':
                #         labels_int = np.append(labels_int,1)
                #         # labels_int.append(1)
                #     case 'd8':
                #         labels_int = np.append(labels_int,2)
                #         # labels_int.append(2)
                #     case 'd10':
                #         labels_int = np.append(labels_int,3)
                #         # labels_int.append(3)
                #     case 'd12':
                #         labels_int = np.append(labels_int,4)
                #         # labels_int.append(4)
                #     case 'd20':
                #         labels_int = np.append(labels_int,5)
                #         # labels_int.append(5)                                                                                                                        
            else:
                continue
    # tuple = dice[2030]
    # tuple_label = tuple[0]
    # tuple_grayimg = tuple[1]
    # print(dice)
    # print(tuple_label)
    # print(len(dice)) # = 14284
    # pyplot.imshow(tuple_grayimg, cmap = pyplot.cm.Greys_r)
    # pyplot.show() 
    # print(labels)
    return gray_arrays, labels_int,i

  
def main():
    # img = img2gray("d4_angle_color000.jpg") 
    # print(img)
    # print(img.size)
    i = loadData(r"C:\Users\maart\Documents\IIW\Machine\Taak\Data\dice-d4-d6-d8-d10-d12-d20\dice\train")
    
    # print(labels_int)
    print(i)
    # test values for the parameters theta
    # theta_t = np.array([-2, -1, 1, 2], dtype=float)
    # # test value for the regularization parameter
    # lambda_t = 3

    # J, grad = lrCostFunction(theta_t, gray_arrays, labels_int, lambda_t)

    # lambda_ = 0.1
    # num_labels = 6
    # all_theta = oneVsAll(gray_arrays, labels_int, num_labels, lambda_)
    # print(all_theta.shape)

    # pred = predictOneVsAll(all_theta, dice)
    # print('Training Set Accuracy: {:.2f}%'.format(np.mean(pred == labels_int) * 100))

main()

(array([], dtype=float64), array([], dtype=float64), 14284)
