In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from gradient_ascent_functions import * # the gradient ascent functions

## Processing the dataset

The values of `Sex` and `Embarked` (port of embarkation) are transformed into integeres with the following correspondence:
- Sex:
    - `male` -> `0`
    - `female` -> `1`
- Embarked:
    - `C` -> `-1`
    - `Q` -> `0`
    - `S` -> `1`

In [2]:
# Importing the data and dropping features uncorrelated with survival outcome
titanic_dataset = pd.read_csv("train.csv")
training_dataset = titanic_dataset.drop(columns=["Ticket", 'Cabin', 'Name', 'PassengerId'])

#Normalizing with the mean the Age and Fare values
training_dataset['Age'] = ((training_dataset['Age'] - training_dataset['Age'].mean()) / training_dataset['Age'].max())
training_dataset['Fare'] = ((training_dataset['Fare'] - training_dataset['Fare'].mean()) / training_dataset['Fare'].max())

# Droppig passangers with missing values
training_dataset = training_dataset.dropna(subset=["Embarked", "Age"])

# Discretizing the `Sex` and `Embarked` values
training_dataset['Sex'] = training_dataset['Sex'].replace(to_replace=["male", "female"], value=[0, 1])
training_dataset['Embarked'] = training_dataset['Embarked'].replace(to_replace=["C", "Q", "S"], value=[-1, 0, 1])

# Transforming the dataset in numpy arrays to implement the gradient ascent
data_matrix = training_dataset.drop(columns='Survived').to_numpy()
target_array = training_dataset['Survived'].to_numpy().reshape((-1, 1))

# adding a column on ones for to the data matrix corresponding to the coventional x0 feture
column_of_ones = np.ones(shape=(data_matrix.shape[0], 1))
data_matrix = np.hstack((column_of_ones, data_matrix))

# Visualizing the first elements of the training_dataset
training_dataset.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,0,-0.096239,1,0,-0.048707,1
1,1,1,1,0.103761,1,0,0.076277,-1
2,1,3,1,-0.046239,0,0,-0.04739,1
3,1,1,1,0.066261,1,0,0.040786,1
4,0,3,0,0.066261,0,0,-0.047146,1


In [10]:
def sigmoid(x):
    '''
    Function to compute the sigmoid of a given input x.
    
    Input:
    x: it's the input data matrix. The shape is (N, H)

    Output:
    g: The sigmoid of the input x
    '''
    g = 1 / (1 + np.exp(-x))
    return g

def log_likelihood(theta,features,target):
    '''
    Function to compute the log likehood of theta according to data x and label y
    
    Input:
    theta: it's the model parameter matrix.
    features: it's the input data matrix. The shape is (N, H)
    target: the label array
    
    Output:
    log_g: the log likehood of theta according to data x and label y
    '''
      
    log_l=((target * np.log(sigmoid(features @ theta))+(1-target)*np.log(1-sigmoid(features @ theta))).sum()) / len(features)

    return log_l


def predictions(features, theta):
    '''
    Function to compute the predictions for the input features
    
    Input:
    theta: it's the model parameter matrix.
    features: it's the input data matrix. The shape is (N, H)
    
    Output:
    preds: the predictions of the input features
    '''
      
    response=sigmoid(features @ theta)
    preds = np.where(response >=0.5, 1, 0)
    return preds


def update_theta(theta, target, preds, features, lr):
    '''
    Function to compute the gradient of the log likelihood
    and then return the updated weights

    Input:
    theta: the model parameter matrix.
    target: the label array
    preds: the predictions of the input features
    features: it's the input data matrix. The shape is (N, H)
    lr: the learning rate
    
    Output:
    theta: the updated model parameter matrix.
    '''
    
    # (712, 1) @ (712, 8)
    log_lik_deriv = (((target - sigmoid(features @ theta)).transpose() @ features).transpose() / len(features))
    theta = theta + lr * log_lik_deriv
    return theta 

def gradient_ascent(theta, features, target, lr, num_steps):
    '''
    Function to execute the gradient ascent algorithm

    Input:
    theta: the model parameter matrix.
    target: the label array
    num_steps: the number of iterations 
    features: the input data matrix. The shape is (N, H)
    lr: the learning rate
    
    Output:
    theta: the final model parameter matrix.
    log_likelihood_history: the values of the log likelihood during the process
    '''

    log_likelihood_history = np.zeros(num_steps)
    
    
    for step in range(num_steps):
      
        log_likelihood_history[step]=log_likelihood(theta,features,target)
        preds=predictions(features, theta)
        theta=update_theta(theta, target, preds, features, lr)
    
    return theta, log_likelihood_history


## Implementing the method

In [3]:
# Initialize theta0
theta0 = np.zeros((data_matrix.shape[1], 1)) # 1dimensional verical array of shape (8, 1) 

# Run Gradient Ascent method
n_iter=2000
theta_final, log_l_history = gradient_ascent(theta0, data_matrix, target_array, lr=.05 , num_steps=n_iter)

# Plotting the log likelihood over iterations
fig,ax = plt.subplots(num=2)
ax.set_ylabel('l(Theta)')
ax.set_xlabel('Iterations')
_=ax.plot(range(len(log_l_history)),log_l_history,'b.')

NameError: name 'np' is not defined