In [1]:
from sklearn.decomposition  import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston, load_iris, load_breast_cancer, make_blobs
import numpy as np
from random import randrange
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

In [2]:
def softmax_loss_vectorized(W, X, y, alpha, fit_intercept=False):
    """
    Softmax loss function WITHOUT FOR LOOPS

    Inputs:
    - W: array of shape (D, C) containing weights
    - X: array of shape (N, D) containing a minibatch of data
    - y: array of shape (N,) containing training labels
    - alpha: (float) regularization 

    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W;  same shape as W
    """
    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)

    # YOUR CODE HERE
    N = X.shape[0]
    f = np.dot(X, W)
    f -= f.max(axis = 1).reshape(N, 1)
    s = np.exp(f).sum(axis = 1)
    loss = np.log(s).sum() - f[range(N), y].sum()

    counts = np.exp(f) / s.reshape(N, 1)
    counts[range(N), y] -= 1
    dW = np.dot(X.T, counts)

    loss = loss / N + 0.5 * alpha * np.sum(W * W)
    dW = dW / N + alpha * W

    return loss, dW

In [13]:
class LinearModel():
    def __init__(self, fit_intercept=True):
        self.W = None
        self.fit_intercept = fit_intercept

    def train(self, X, y, learning_rate=1e-3, alpha=0, num_iters=100, batch_size=200, verbose=False):
        if self.fit_intercept:
            if self.fit_intercept == True:
                one = np.array([np.ones(X.shape[0])])
                X = np.concatenate((X, one.T), axis=1)
            
        N, d = X.shape
        
        C = (np.max(y) + 1) 
        if self.W is None: # Initialization
            self.W = 0.001 * np.random.randn(d, C)

        # Run stochastic gradient descent to optimize W
        
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None
                                                               
            # Sample batch_size elements in X_batch and y_batch
            # X_batch shape is  (batch_size, d) and y_batch shape is (batch_size,)                                                                                          
            # Hint: Use np.random.choice to generate indices
            # YOUR CODE HERE
            n = len(X)
            index = np.random.choice(range(n), size=batch_size)
            X_batch = X[index]
            y_batch = y[index]
            
            # evaluate loss and gradient
            loss, dW = self.loss(X_batch, y_batch, alpha)
            loss_history.append(loss)

            # perform parameter update                                                                
            # Update the weights w using the gradient and the learning rate.          
            # YOUR CODE HERE
            self.W = self.W - learning_rate * dW
            
            if verbose and it % 10 == 0:
                print("iteration %d / %d: loss %f" % (it, num_iters, loss))
                
        return loss_history

    def predict(self, X):
        pass

    def loss(self, X_batch, y_batch, reg):
        pass

class MultinomialLogisticRegressor(LinearModel):
    """ Softmax regression """

    def loss(self, X_batch, y_batch, alpha):
        return softmax_loss_vectorized(self.W, X_batch, y_batch, alpha)
    
    def predict(self, X):
        """ 
        Inputs:
        - X: array of shape (N, D) 

        Returns:
        - y_pred: 1-dimensional array of length N, each element is an integer giving the predicted class 
        """
        if self.fit_intercept == True:
            one = np.array([np.ones(X.shape[0])])
            X = np.concatenate((X, one.T), axis=1)
                
        proba = (np.exp(X@self.W).T *1/np.sum(np.exp(X@self.W), axis=1)).T
        y_pred = np.argmax(proba, axis=1)
        return y_pred

In [4]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [6]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [7]:
data = unpickle("/content/drive/MyDrive/Carraz/a9/cifar-100-python/train")

In [10]:
Train_features = data[b'data']
Train_labels = data[b'fine_labels']
Train_labels = np.array(Train_labels)

In [11]:
### Preprocessing data with MinMaxScaler
Train_features_preprocessed = Train_features/255

In [14]:
model = MultinomialLogisticRegressor(fit_intercept=True)
model.train(Train_features_preprocessed, Train_labels, num_iters=75000, batch_size=32, learning_rate=1e-3, verbose=True)

[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
iteration 25000 / 75000: loss 3.576565
iteration 25010 / 75000: loss 3.719394
iteration 25020 / 75000: loss 3.846306
iteration 25030 / 75000: loss 3.809617
iteration 25040 / 75000: loss 3.422312
iteration 25050 / 75000: loss 4.105900
iteration 25060 / 75000: loss 3.925501
iteration 25070 / 75000: loss 3.874854
iteration 25080 / 75000: loss 3.563569
iteration 25090 / 75000: loss 3.939596
iteration 25100 / 75000: loss 3.706605
iteration 25110 / 75000: loss 3.868047
iteration 25120 / 75000: loss 4.097709
iteration 25130 / 75000: loss 3.875739
iteration 25140 / 75000: loss 3.778929
iteration 25150 / 75000: loss 3.742866
iteration 25160 / 75000: loss 3.726709
iteration 25170 / 75000: loss 3.667468
iteration 25180 / 75000: loss 3.887020
iteration 25190 / 75000: loss 4.031586
iteration 25200 / 75000: loss 3.841382
iteration 25210 / 75000: loss 3.511333
iteration 25220 / 75000: loss 4.006797
iteration 2

NameError: ignored

In [16]:
pred = model.predict(Train_features_preprocessed)
model_accuracy = accuracy_score(Train_labels, pred)
print(model_accuracy)

0.18846
