In [2]:
import numpy as np
import pandas as pd

In [3]:
dataset = pd.read_csv('titanic.csv')

# Delete the columns that are not useful
dataset = dataset.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

# Fill the missing values
dataset['Age'] = dataset['Age'].fillna(dataset['Age'].mean())
dataset['Embarked'] = dataset['Embarked'].fillna(dataset['Embarked'].mode()[0])

# Convert the categorical variables into dummy variables
dataset = pd.get_dummies(dataset, columns=['Embarked', 'Sex'])

# True to 1, False to 0
dataset['Embarked_C'] = dataset['Embarked_C'].astype(np.int64)
dataset['Embarked_Q'] = dataset['Embarked_Q'].astype(np.int64)
dataset['Embarked_S'] = dataset['Embarked_S'].astype(np.int64)
dataset['Sex_female'] = dataset['Sex_female'].astype(np.int64)
dataset['Sex_male'] = dataset['Sex_male'].astype(np.int64)

# Split the dataset into the input and output variables
X = dataset.drop('Survived', axis=1)
y = dataset['Survived']

# Split the dataset into the training and test sets
len_train = int(len(dataset) * 0.8)
X_train, X_test = X[:len_train], X[len_train:]
y_train, y_test = y[:len_train], y[len_train:]

X_train.head(5), y_train.head(5)

(   Pclass   Age  SibSp  Parch     Fare  Embarked_C  Embarked_Q  Embarked_S  \
 0       3  22.0      1      0   7.2500           0           0           1   
 1       1  38.0      1      0  71.2833           1           0           0   
 2       3  26.0      0      0   7.9250           0           0           1   
 3       1  35.0      1      0  53.1000           0           0           1   
 4       3  35.0      0      0   8.0500           0           0           1   
 
    Sex_female  Sex_male  
 0           0         1  
 1           1         0  
 2           1         0  
 3           1         0  
 4           0         1  ,
 0    0
 1    1
 2    1
 3    1
 4    0
 Name: Survived, dtype: int64)

$f(\vec{x}) = \vec{x} \cdot \vec{w} + b$

In [6]:
def scaler(X):
    return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

class LinearModel:
    def __init__(self):
        self.w = None
        self.b = None

    def fit(self, X, y, learning_rate=0.01, epochs=1000):
        self.w = np.zeros(X.shape[1])
        self.b = 0

        X = scaler(X)

        for _ in range(epochs):
            z = np.dot(X, self.w) + self.b
            y_pred = 1 / (1 + np.exp(-z))
            error = y - y_pred
            self.w += learning_rate * np.dot(X.T, error)
            self.b += learning_rate * np.sum(error)

    def predict(self, X):
        X = scaler(X)
        z = np.dot(X, self.w) + self.b
        return 1 / (1 + np.exp(-z))

model = LinearModel()
model.fit(X_train, y_train)

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob >= 0.5).astype(int)

accuracy = np.mean(y_pred == y_test)
precision = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_pred == 1)
recall = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_test == 1)
f1 = 2 * precision * recall / (precision + recall)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1: {f1}')

Accuracy: 0.7821229050279329
Precision: 0.711864406779661
Recall: 0.65625
F1: 0.6829268292682927
