In [1]:
import numpy as np
import pandas as pd

titanic_data = pd.read_csv("titanic_dataset.csv")

titanic_data = titanic_data[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
titanic_data['Sex'] = titanic_data['Sex'].map({'male': 0, 'female': 1})

titanic_data.fillna(titanic_data.median(), inplace=True)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost_function(X, y, theta):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    cost = (1 / m) * np.sum(-y * np.log(h) - (1 - y) * np.log(1 - h))
    return cost

def stochastic_gradient_descent(X, y, theta, alpha, iterations):
    m = len(y)
    for _ in range(iterations):
        for i in range(m):
            h = sigmoid(np.dot(X[i], theta))
            gradient = X[i].reshape(-1, 1) * (h - y[i])
            theta -= alpha * gradient
    return theta

X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']
X = (X - X.mean()) / X.std()
X.insert(0, 'Intercept', 1)
X = X.values
y = y.values.reshape(-1, 1)

theta = np.zeros((X.shape[1], 1))

alpha = 0.01
iterations = 1000

theta = stochastic_gradient_descent(X, y, theta, alpha, iterations)

def predict_survival(passenger):
    passenger = (passenger - titanic_data.drop('Survived', axis=1).mean()) / titanic_data.drop('Survived', axis=1).std()
    passenger = np.insert(passenger.values, 0, 1)
    prediction = sigmoid(np.dot(passenger, theta))
    return prediction

random_passenger = np.random.randn(6)
prediction = predict_survival(random_passenger)
print("Survival probability:", prediction)

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

logistic_reg_model = LogisticRegression()
logistic_reg_model.fit(X_train, y_train.ravel())
logistic_reg_predictions = logistic_reg_model.predict(X_test)
logistic_reg_accuracy = accuracy_score(y_test, logistic_reg_predictions)
print("Logistic Regression Accuracy:", logistic_reg_accuracy)

from sklearn.svm import SVC

svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train.ravel())
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

from sklearn.tree import DecisionTreeClassifier

decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(X_train, y_train)
decision_tree_predictions = decision_tree_model.predict(X_test)
decision_tree_accuracy = accuracy_score(y_test, decision_tree_predictions)
print("Decision Tree Accuracy:", decision_tree_accuracy)


Survival probability: [0.8409551]
Logistic Regression Accuracy: 0.7988826815642458
SVM Accuracy: 0.7821229050279329
Decision Tree Accuracy: 0.7653631284916201
