In [9]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score

In [2]:
# colab specific code
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# HW5

## Q5

In [4]:
def partial_u(u, v): # taking the derivative of u
    return 2*(np.e**v + 2*v*np.e**-u)*(u*np.e**v - 2*v*np.e**-u)

def partial_v(u, v): # taking the derivative of v
    return 2*(u*np.e**v - 2*np.e**-u) * ( u*np.e**v - 2*v*np.e**-u)

def error(u, v): # taking the error by given function
    return (u*np.e ** v - 2*v* np.e ** -u)**2

In [5]:
u, v, lr = 1, 1, 0.1 # setting the values of u, v and learning rate
step = 0 # keeps the number steps
e = 1 # initializing error with a bigger number than 1e-14
while e >= 1e-14:
    u, v = u - lr * partial_u(u, v), v - lr * partial_v(u, v)
    e = error(u, v)
    step += 1

In [6]:
step

10

## Q6

In [7]:
(u, v)

(0.04473629039778207, 0.023958714099141746)

## Q7

In [8]:
u, v, lr = 1, 1, 0.1 # setting the values of u, v and learning rate
e = 1 # # initializing error with a bigger number than 1e-14
for i in range(15): # since we making 2 updates in for loop that makes 15*2=30 steps
    u = u - lr * partial_u(u, v) # taking the derivative of u
    e = error(u, v)              # finding the error
    v = v - lr * partial_v(u, v) # taking the derivative of v
    e = error(u, v)              # fing the error

print(e)

0.13981379199615324


## Q8 & Q9

In [5]:
def determinant(X, f):
    """
    Determines the labels of X dataset by given f target function.
    Then, takes the sign of each label.
    """
    values = (f[1][0] - f[0][0])*(X[:, 2] - f[0][1])-(f[1][1] - f[0][1])*(X[:, 1] - f[0][0])
    return np.sign(values).astype(int)

def create_data_and_pick_target_function(sample_size=10, feature_size=2, target_function=None):
    """
    Inputs:
        sample_size: size of the randomly generated data '10' as default
        feature_size: feature dimensions. '2' as default
        target_function: creates a random target function if not provided,
                         if a target function explicitly given, uses that
                         target function to create labels. Else: uses
                         sign_determinant function to generate labels.
    Outputs:
        X (dataset), f (target_function), and y (True labels)
    """
    X = np.random.uniform(-1, 1, (sample_size, feature_size))
    X = np.concatenate([np.ones((sample_size,1)), X], axis=1) # adds 1s to the left as X0
    f = target_function
    if target_function is None:
        f = np.random.uniform(-1, 1, (2, 2))
    y = determinant(X, f)
    return X, f, y

In [3]:
def shuffle_data(X, y):
    shf_idx = np.arange(X.shape[0]) # creating an array of indices to shuffle data
    np.random.shuffle(shf_idx)      # shuffling indices 
    return X[shf_idx], y[shf_idx]   # returning shuffled X and y

class LogisticRegression:
    """
    Inputs:
        learning_rate: specified learning rate for model 0.01 as default
    """
    def __init__(self, learning_rate=0.01):
        self.lr = learning_rate

    def fit(self, X, y):
        """
        Inputs:
            X: dataset as an array like object
            y: targets of samples

        Output:
            epoch: total number of epoch 
        """

        self.initialize_weights(X) # initializing weights as zero
        epoch = 0 # epoch count
        while True:
            shf_X, shf_y = shuffle_data(X, y)
            wprev = self.w  # holding weights before updating 
            self.update_weights(shf_X, shf_y) # updating weights
            epoch += 1 # updating epoch
            if np.linalg.norm(wprev - self.w) < 0.01: # To end the training operation
                return epoch

    def predict(self, X): 
        """
        Inputs:
            X: dataset as an array like object

        Output:
            prob: probability of each sample
            targets: target labels
        """
        prob = np.ravel(np.dot(X, self.w.T))
        return prob, np.where(1 / (1 + np.exp(-prob))> 0.5 , 1, -1)

    def initialize_weights(self, X):
        self.w = np.zeros((1, X.shape[1]))

    def update_weights(self, X, y): # update weights function applies sgd to update weights
        for idx in range(X.shape[0]):
            error = -(y[idx] * X[idx])/(1 + np.exp(y[idx] * np.dot(self.w, X[idx])))
            self.w = self.w - self.lr * error

In [6]:
avr_epoch = 0 # average of epoch
score = 0 # average of score
for i in range(100):
    X, f, y = create_data_and_pick_target_function(100) # creating training dataset
    X_test, _, y_test = create_data_and_pick_target_function(100, target_function=f) # creating test dataset

    lr = LogisticRegression() # creating model
    epoch = lr.fit(X, y)
    y_prob, y_pred = lr.predict(X_test)
    
    avr_epoch += epoch
    score += np.mean(np.log(1 + np.exp(-y_test * y_prob))) # cross entropy loss

print("Avrage Epoch: ", avr_epoch / 100)
print("Avrage Score: ", score / 100)

Avrage Epoch:  348.68
Avrage Score:  0.10131399824335637


# HW6

### Loading Data

In [None]:
# Colab specific code
% cd /content/drive/MyDrive/Colab Notebooks/ML Lecture

/content/drive/MyDrive/Colab Notebooks/ML Lecture


In [None]:
with open('data/data_in.txt', 'r') as f: # reading the training data
    data_in = f.read().splitlines() # reads the txt file and split into lines

with open('data/data_out.txt', 'r') as f: # reading the test data
    data_out = f.read().splitlines()

In [None]:
data_in = np.array([i.split() for i in data_in]).astype(np.float32) # data splitted from white spaces and 
data_out = np.array([i.split() for i in data_out]).astype(np.float32) # converted to float32 from string 

X_train, y_train, X_test, y_test = data_in[:, :2], data_in[:, 2].astype(int), data_out[:, :2], data_out[:, 2].astype(int)

## Q2

In [None]:
class LinearRegression:
    supported_weights = set(['zero', 'rand'])

    def __init__(self, weight_type='zero'):
        """
        weigh_type: either 'zero' or 'rand' to chose
                    zero or random initialization
        """
        if weight_type not in self.supported_weights:
            raise ValueError(f"{weight_type} is not supported by parameter weight_type")

        self.weight_type = weight_type

    def _initialize_weights(self, X):
        if self.weight_type == 'zero':
            self.w = np.zeros((1, X.shape[1]))
        else:
            self.w = np.random.rand((1, X.shape[1]))

    def update_weights(self, X, y, k=None):
        if k is None:
            X_inv = np.dot(np.linalg.inv(np.dot(X.T, X)), X.T)
            self.w = np.dot(X_inv, y)
        else:
            X_inv = np.dot(np.linalg.inv(np.dot(X.T, X) + 10**k * np.identity(X.shape[1])), X.T)
            self.w = np.dot(X_inv, y)

    def predict(self, X):
        return np.sign(np.dot(self.w, X.T))

    def fit(self, X, y, k=None):
        self._initialize_weights(X)
        self.update_weights(X, y, k)


def non_linear_transformation(X):
    return np.concatenate([np.ones((X.shape[0], 1)), X,
                    (X[:, 0]**2).reshape(-1,1), 
                    (X[:, 1]**2).reshape(-1,1),
                    (X[:, 0]*X[:, 1]).reshape(-1,1),
                    np.abs(X[:, 0] - X[:, 1]).reshape(-1,1),
                    np.abs(X[:, 0] + X[:, 1]).reshape(-1,1)], axis=1) # concatenating all features

In [None]:
trans_X_train = non_linear_transformation(X_train)
trans_X_test = non_linear_transformation(X_test)

lr = LinearRegression()
lr.fit(trans_X_train, y_train)

y_train_pred = lr.predict(trans_X_train)
y_test_pred = lr.predict(trans_X_test)

In [None]:
in_sample_error = 1-accuracy_score(y_train, y_train_pred)
out_sample_error = 1-accuracy_score(y_test, y_test_pred)

In [None]:
in_sample_error, out_sample_error

(0.02857142857142858, 0.08399999999999996)

## Q3

In [None]:
lr = LinearRegression()
lr.fit(trans_X_train, y_train, -3)

y_train_pred = lr.predict(trans_X_train)
y_test_pred = lr.predict(trans_X_test)

In [None]:
in_sample_error = 1-accuracy_score(y_train, y_train_pred)
out_sample_error = 1-accuracy_score(y_test, y_test_pred)

In [None]:
in_sample_error, out_sample_error

(0.02857142857142858, 0.07999999999999996)

## Q4

In [None]:
lr = LinearRegression()
lr.fit(trans_X_train, y_train, 3)

y_train_pred = lr.predict(trans_X_train)
y_test_pred = lr.predict(trans_X_test)

In [None]:
in_sample_error = 1-accuracy_score(y_train, y_train_pred)
out_sample_error = 1-accuracy_score(y_test, y_test_pred)

In [None]:
in_sample_error, out_sample_error

(0.37142857142857144, 0.43600000000000005)

## Q5 & Q6

In [None]:
min_out = {}
for k in [2, 1, 0, -1, -2]: # looping over the choices :)
    lr = LinearRegression()
    lr.fit(trans_X_train, y_train, k)

    y_train_pred = lr.predict(trans_X_train)
    y_test_pred = lr.predict(trans_X_test)

    in_sample_error = 1-accuracy_score(y_train, y_train_pred)
    out_sample_error = 1-accuracy_score(y_test, y_test_pred)

    min_out[k] = out_sample_error

In [None]:
min_out # -1 gives the min out error which closest to 0.06 in question 6

{-2: 0.08399999999999996,
 -1: 0.05600000000000005,
 0: 0.09199999999999997,
 1: 0.124,
 2: 0.22799999999999998}

## Q10

In [11]:
max_num_weights = 0
pattern = 0

total_hidden_units = 36
for l1_u in range(2, 34): # since a layer must have at least 2 unit with bias term 
    l2_u = total_hidden_units - l1_u

    num_weights = (l1_u-1)*10 + (l2_u-1)*l1_u + (1 * l2_u)
    # I subtract -1 from each layer units to eliminate bias
    # and 1 * l2_u comes from the last hidden layer and output layer

    if num_weights > max_num_weights:
        max_num_weights = num_weights
        pattern = [10, l1_u, l2_u, 1]

pattern, max_num_weights

([10, 22, 14, 1], 510)