In [15]:
import numpy as np
from scipy.special import softmax
import pandas as pd
from sklearn import datasets
from sklearn.utils import shuffle

In [16]:
wine = datasets.load_wine()
#wine dataset is consist of wine.data and wine.target 
#while data are the features(what are the feature of this wine) and target are the label of class(what type of wine)

df = pd.DataFrame(wine.data, columns=wine.feature_names)
df['label'] = wine.target
# Shuffle the DataFrame
df = shuffle(df, random_state=42).reset_index(drop=True)
# Display the first few rows
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,label
0,13.64,3.1,2.56,15.2,116.0,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845.0,0
1,14.21,4.04,2.44,18.9,111.0,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080.0,0
2,12.93,2.81,2.7,21.0,96.0,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600.0,2
3,13.73,1.5,2.7,22.5,101.0,3.0,3.25,0.29,2.38,5.7,1.19,2.71,1285.0,0
4,12.37,1.17,1.92,19.6,78.0,2.11,2.0,0.27,1.04,4.68,1.12,3.48,510.0,1


In [26]:
X = wine.data
Y = wine.target
#存在xy里面
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
enc = OneHotEncoder()
Y = enc.fit_transform(Y[:, np.newaxis]).toarray()
#把x的数据normalize
#保存了这个数据的分布情况，但是大部分都压缩，利于gd
scaler = StandardScaler()
X = scaler.fit_transform(X)
#把数据分为训练数据和测试数据
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=11)

X_train = X_train.T 
X_test = X_test.T 
Y_train = Y_train.T 
Y_test = Y_test.T

In [31]:
def Gradient(W, X_batch, t_batch):
    Y = softmax(W @ X_batch, axis = 0) #this is our model
    #     for the prediction
    N = X_batch.shape[1] #number of points in batch 
    K = W.shape[0] #number of classes
    M = W.shape[1] #number of features
    G = np.zeros((K, M))
        #this will store the gradient, the dimensions are K:
    #     number of classes, M: number of features
        # calculate the gradient according to the formula in the exercises
    G = (Y - t_batch) @ X_batch.T / N 
    return G


def logisticGD(X_train, t_train, batch_size, l_rate = 0.1, tol = 1e-5, epochs = 10):
    """
    batch_size = size of training set, correspond to GD batch_size = 1, correspond to SGD
    all other values correspond to mini batch GD.
    """
    K = t_train.shape[0]
    N = X_train.shape[1]
    M = X_train.shape[0]
    W = np.random.rand(K, M) #initialization of the model parameters norm_G = float('inf')
    norm_G = float('inf')
    n_batches = N // batch_size
    epoch = 1
    while epoch <= epochs and norm_G > tol: 
        indices = np.random.permutation(N) 
        X_shuffle = X_train[:, indices] 
        t_shuffle = t_train[:, indices]
        
        for j in range(n_batches):
            X_batch = X_shuffle[:, j * batch_size:(j + 1) * batch_size] 
            t_batch = t_shuffle[:, j * batch_size:(j + 1) * batch_size] 
            G = Gradient(W, X_batch, t_batch)
            W = W - l_rate * G
            norm_G = np.linalg.norm(W) 
            epoch += 1
    return W

In [32]:
epochs = 10000
#Training the models with GD, SGD or mini-batch GD
#Define the correct value for batch_size
### GD
batch_size = X_train.shape[1]
W_GD = logisticGD(X_train, Y_train, batch_size = batch_size, epochs = epochs, tol = 1e-5, l_rate = 0.001)
### SGD
batch_size = 1
W_SGD = logisticGD(X_train, Y_train, batch_size = batch_size, epochs = epochs, tol = 1e-5, l_rate = 0.001)
### Mini batch GD
batch_size = X_train.shape[1]//10
W_MGD = logisticGD(X_train, Y_train, batch_size = batch_size, epochs = epochs, tol = 1e-5, l_rate = 0.001)

In [34]:
# Calculate accuracy on the training set
Y_train_pred = softmax(W_GD @ (X_train)) 
train_predictions = np.argmax(Y_train_pred, axis=0) 
train_true = np.argmax(Y_train, axis = 0)
train_accuracy = np.mean(train_predictions == train_true) 
print("Training Accuracy GD:", train_accuracy)
# Calculate accuracy on the test set
Y_test_pred = softmax(W_GD @ (X_test)) 
test_predictions = np.argmax(Y_test_pred, axis=0) 
test_true = np.argmax(Y_test, axis = 0)
test_accuracy = np.mean(test_predictions == test_true) 
print("Test Accuracy GD:", test_accuracy)
# Calculate accuracy on the training set
Y_train_pred = softmax(W_SGD @ (X_train)) 
train_predictions = np.argmax(Y_train_pred, axis=0) 
train_true = np.argmax(Y_train, axis = 0)
train_accuracy = np.mean(train_predictions == train_true) 
print("Training Accuracy SGD:", train_accuracy)
# Calculate accuracy on the test set
Y_test_pred = softmax(W_SGD @ (X_test)) 
test_predictions = np.argmax(Y_test_pred, axis=0) 
test_true = np.argmax(Y_test, axis = 0)
test_accuracy = np.mean(test_predictions == test_true) 
print("Test Accuracy SGD:", test_accuracy)
# Calculate accuracy on the training set
Y_train_pred = softmax(W_MGD @ (X_train)) 
train_predictions = np.argmax(Y_train_pred, axis=0) 
train_true = np.argmax(Y_train, axis = 0)
train_accuracy = np.mean(train_predictions == train_true) 
print("Training Accuracy mini batch GD:", train_accuracy)
# Calculate accuracy on the test set
Y_test_pred = softmax(W_MGD @ (X_test)) 
test_predictions = np.argmax(Y_test_pred, axis=0) 
test_true = np.argmax(Y_test, axis = 0)
test_accuracy = np.mean(test_predictions == test_true) 
print("Test Accuracy mini batch GD:", test_accuracy)

Training Accuracy GD: 0.9699248120300752
Test Accuracy GD: 0.9555555555555556
Training Accuracy SGD: 0.9774436090225563
Test Accuracy SGD: 0.9777777777777777
Training Accuracy mini batch GD: 0.9849624060150376
Test Accuracy mini batch GD: 0.9777777777777777
