# Logistic Regression

Implement a logistic (binary) regression model and use your stochastic gradient descent approach from the last practicals to optimize the weights.

In [15]:
#You can import your code from a different notebook like follows (change it to your path)

# %run ..\5_SGD_Linear_Regression\SGD_Solution.ipynb

## Data for the logistic regression model

In [16]:
import numpy as np

In [17]:
# The resulting data for training and test are in (X_train, y_train) and (X_test, y_test), respectively.

# Lets assume we have some data points that define the data as 'fraud' or 'no fraud', e. g. in bank account scenarios.
# We therefore have three values given ('number of withdrawals per week', 'avg_withdrawal_amount', 'number_of_different_addressees').

data_amount = 15
frauds_amount = data_amount // 2
no_frauds_amount = data_amount - frauds_amount

### Generate (fake) 'fraud' data
# number of withdrawals per week (fraud) - between 0 and 0.5
X_1_f = np.random.rand(frauds_amount) / 2
# average withdrawal amount 'fraud' - between 0.5 and 1
X_2_f = (np.random.rand(frauds_amount) + 1) / 2
# number of different addressees 'fraud' - between 0 and 0.5
X_3_f = np.random.rand(frauds_amount) / 2

X_f = np.stack([X_1_f, X_2_f, X_3_f], axis=1)

# Labels of 'fraud' (1)
y_f = np.ones(frauds_amount)

### Generate (fake) 'no fraud' data - between 0.5 and 1
X_1_nf = (np.random.rand(no_frauds_amount) + 1) / 2
# average withdrawal amount 'no fraud' - between 0 and 0.5
X_2_nf = np.random.rand(no_frauds_amount) / 2
# number of different addressees 'no fraud' - between 0.5 and 1
X_3_nf = (np.random.rand(no_frauds_amount) + 1) / 2

X_nf = np.stack([X_1_nf, X_2_nf, X_3_nf], axis=1)

# Labels of 'no fraud' (0)
y_nf = np.zeros(no_frauds_amount)

## Shuffle fraud and no fraud data to create a mixed dataset

In [18]:
# Combine them (concatenate)
X = np.concatenate((X_f, X_nf))
y = np.concatenate((y_f, y_nf))

# now randomly shuffle them
shuffled_indices = np.random.choice(y.shape[0], size=y.shape[0], replace=False)
X = X[shuffled_indices]
y = y[shuffled_indices]

## Split into train and test data

In [19]:
train_len = int(data_amount * 0.75)

# We train with the following data
X_train = X[:train_len]
y_train = y[:train_len]

# We test / evaluate with the following data
X_test = X[train_len:]
y_test = y[train_len:]

## Initialize the weights of your logistic regression model (see SGD exercise on how to do this with numpy - dont forget to initialize the bias nodes as well!)

In [20]:
final_weights = np.random.rand(X.shape[1])
final_weights = final_weights / np.sum(final_weights)

final_bias = 0.2

## Define the loss function (derivative) for your logistic regression model

In [21]:
mse = lambda y_true, y_pred: -np.mean(np.sum(y_true * np.log(y_pred), axis=1))

## Define the activation function for your logistic regression model

In [22]:
# Hint:  How do you scale your output for logistic regression? What is the range?

In [26]:
## Use your Stochastic Gradient Descent approach from the previous exercise and optimize your weights.
# If your SGD implementation cannot do this, adjust the function implementation until it is able to do it :)

learning_rate = 0.005
iterations = 1000

def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
class Logistic_Regression:
    def __init__(self, learning_rate=0.005, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations

    def fit(self, X_train, y_train):
        number_of_samples, number_of_features = X_train.shape
        self.weights = np.zeros(number_of_features)
        self.bias = 0
        for i in range(self.iterations):
                linear_pred = np.dot(X_train, self.weights) + self.bias
                y_pred = sigmoid(linear_pred)
                dw = (1 / number_of_samples) * np.dot(X_train.T, (y_pred - y_train))
                db = (1 / number_of_samples) * np.sum(y_pred - y_train)

                self.weights -= self.learning_rate * dw
                self.bias -= self.learning_rate * db

    def predict(self, X_test):
        linear_pred = np.dot(X_test, self.weights) + self.bias
        y_pred = sigmoid(linear_pred)
        class_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return class_pred



In [27]:
# Results
model = Logistic_Regression()
model.fit(X_train,y_train)
result = model.predict(X_test)

print(result)
print(y_test)

[0, 0, 1, 1]
[0. 0. 1. 1.]


## Predict the values for the test data

In [25]:
# Prediction