# Import libraries


In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load and preprocess the training dataset


In [51]:
data = pd.read_csv('/content/train.csv')

# Fill missing values


In [52]:
data["Age"].fillna(data["Age"].mean(), inplace=True)
data["Embarked"].fillna(data['Embarked'].value_counts().idxmax(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data["Age"].fillna(data["Age"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data["Embarked"].fillna(data['Embarked'].value_counts().idxmax(), inplace=True)


# Drop unnecessary columns


In [53]:
data.drop(['Cabin', 'PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)

# One-hot encode categorical variables


In [54]:
data = pd.get_dummies(data, columns=["Pclass", "Embarked", "Sex"])
data.drop('Sex_female', axis=1, inplace=True)

# Separate features and target variable


In [55]:
x_train = data.drop(['Survived'], axis=1)
y_train = data['Survived']


# Add intercept column for logistic regression


In [56]:
x_train = pd.concat([pd.Series(1, index=x_train.index, name="Intercept"), x_train], axis=1)

# Convert to NumPy arrays


In [57]:
numpy_x_train = x_train.values
numpy_y_train = y_train.values

# Define sigmoid function


In [58]:
def sigmoid(z):
    print("Type of z:", type(z))  # بررسی نوع ورودی
    print("Value of z:", z)  # بررسی مقدار ورودی
    return 1 / (1 + np.exp(-z))

# Define gradient function


In [59]:
def gradient(w, x, y):
    m = len(y)
    y_pred = sigmoid(x.dot(w))
    return x.T.dot(y_pred - y) / m

In [60]:
initial_w = np.random.randn(x_train.shape[1])

In [61]:
print("Shape of x:", x_train.shape)
print("Shape of w:", initial_w.shape)

Shape of x: (891, 12)
Shape of w: (12,)


In [62]:
print("Type of x:", type(numpy_x_train))
print("Type of w:", type(initial_w))

Type of x: <class 'numpy.ndarray'>
Type of w: <class 'numpy.ndarray'>


In [63]:
print("Data type of x elements:", numpy_x_train.dtype)
print("Data type of w elements:", initial_w.dtype)

Data type of x elements: object
Data type of w elements: float64


In [64]:
numpy_x_train = np.array(numpy_x_train, dtype=np.float64)

In [65]:
grad = gradient(initial_w, numpy_x_train, numpy_y_train)
print("Gradient shape:", grad.shape)
print("Gradient values:", grad)


Type of z: <class 'numpy.ndarray'>
Value of z: [ -31.88157202  -99.44716042  -40.11879246  -85.05574289  -49.74316997
  -43.09532955 -107.13221809  -15.43885583  -43.33860986  -37.95397681
  -18.39378333  -93.7646714   -31.20101639  -70.44448863  -25.22937677
  -82.35029257  -20.19236046  -47.11566183  -53.06229046  -40.52040506
  -63.89439638  -52.43216991  -26.18681271  -62.12138663  -24.45495962
  -70.89608237  -38.92116271 -225.92519811  -44.23903924  -43.06922125
  -67.21542883 -148.36953283  -44.13740771  -90.02220982  -94.05082676
  -91.24422161  -38.92446652  -32.43715996  -35.82998992  -23.11123122
  -57.4816321   -50.50894725  -39.44882858  -32.86911062  -31.01339373
  -43.19051833  -47.4720499   -44.13740771  -47.96628276  -36.83509967
  -35.09813712  -32.24050452 -117.32860317  -56.9143432  -124.7973987
  -64.22173999  -35.99499142  -37.44218495  -28.08344077  -44.28376437
  -29.40725172 -111.0867331  -119.71157226  -23.00981411  -54.48205932
  -43.79804204  -45.88414     -

# Define gradient descent function


In [66]:
def gradient_decent(w, alpha, iterations, X, y):
    cost_history = np.zeros(iterations)
    w = w.copy()
    for i in range(iterations):
        grad = gradient(w, X, y)
        w -= alpha * grad
    return w


# Initialize weights and train the model


In [67]:
iterations = 1000
alpha = 0.01
w = gradient_decent(initial_w, alpha, iterations, numpy_x_train, numpy_y_train)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 -3.52190281e+00 -3.21155950e+00 -3.95829168e+00 -2.06165414e+00
 -3.81728471e+00 -3.62251875e+00 -1.10879999e+00 -2.67276520e+00
 -1.96461262e-01 -2.71032946e+00  3.27840640e-01 -2.96413560e+00
 -2.82829237e+00 -4.35800228e+00 -4.47674465e+00 -1.15526959e+00
 -3.31965803e+00 -3.21183462e+00 -5.74982518e+00 -5.11456157e+00
  8.03646614e-01 -2.66053917e+00 -2.28670833e+00 -3.17283492e+00
 -3.74662630e+00 -2.80434977e+00 -4.24130739e+00 -3.48727744e+00
 -2.89289404e+00  1.61400705e+00 -1.78801746e+00 -2.78254124e+00
  1.31915521e+00  3.86125922e-01 -2.82096716e+00 -8.23779496e-01
 -3.58888571e+00 -3.51040236e+00 -3.58318892e+00 -2.29127662e+00
 -2.95459537e+00 -3.60902694e+00 -2.79334262e+00 -1.57089830e+00
 -3.58318892e+00 -3.68142079e+00 -2.41785496e+00  3.23821713e-01
 -1.98351727e+00 -1.20245055e+00 -3.47154648e+00 -4.03866326e+00
 -2.93097408e+00 -2.14083073e+00 -2.56033807e+00 -3.58517901e+00
 -4.52822411e+00 -2.24189

# Define accuracy function


In [68]:
def accuracy_score(y_true, y_pred):
    correct_predictions = np.sum(y_true == y_pred)
    return correct_predictions / len(y_true)

# Make predictions on the training set


In [69]:
y_pred = sigmoid(numpy_x_train.dot(w)) > 0.5
accuracy = accuracy_score(numpy_y_train, y_pred)
print("Training Accuracy:", accuracy)


Type of z: <class 'numpy.ndarray'>
Value of z: [-3.11212278e+00  6.10923608e-01 -3.70426554e+00 -2.58940353e+00
 -3.96698746e+00 -3.21335344e+00 -3.53008821e+00 -1.56889369e+00
 -3.71552738e+00  1.18503152e+00 -2.04909624e+00 -4.60703734e+00
 -2.90316943e+00 -3.90322925e+00 -2.85438523e+00 -5.01378490e+00
 -1.11832313e+00 -2.89567027e+00 -3.94563757e+00 -8.66670768e-01
 -3.05603048e+00 -3.20069401e+00 -2.55148627e+00 -1.95748352e+00
 -2.36791687e+00 -4.20393838e+00 -4.93174799e-01  2.29846231e+00
 -3.59645288e+00 -3.59359968e+00  1.74005208e-01  2.44732881e+00
 -3.59859546e+00 -5.51163107e+00  1.87418381e+00 -2.73059775e+00
 -4.93105149e-01 -2.97409064e+00 -3.07750614e+00  2.59496039e-01
 -4.72530222e+00 -2.99891823e+00 -4.82050627e-01  2.16246833e+00
 -2.83765860e+00 -3.59104251e+00 -3.15042201e+00 -3.59859546e+00
 -3.61162804e-01 -3.02697864e+00 -1.66868459e+00 -2.97823650e+00
 -7.88978479e-02 -3.05784343e+00 -1.02767532e+00 -2.07798698e+00
 -2.69367296e+00 -4.08062284e-01 -1.3202588

# Load and preprocess the test dataset


In [70]:
data_test = pd.read_csv('/content/test.csv')
x_test = data_test.copy()


# Fill missing values


In [71]:
x_test["Age"].fillna(x_test["Age"].mean(), inplace=True)
x_test["Embarked"].fillna(x_test['Embarked'].value_counts().idxmax(), inplace=True)
x_test["Fare"].fillna(x_test["Fare"].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  x_test["Age"].fillna(x_test["Age"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  x_test["Embarked"].fillna(x_test['Embarked'].value_counts().idxmax(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the int

# Drop unnecessary columns


In [72]:
x_test.drop(['Cabin', 'PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)

# One-hot encode categorical variables


In [73]:
x_test = pd.get_dummies(x_test, columns=["Pclass", "Embarked", "Sex"])
x_test.drop('Sex_female', axis=1, inplace=True)

# Add intercept column for logistic regression


In [74]:
x_test = pd.concat([pd.Series(1, index=x_test.index, name="Intercept"), x_test], axis=1)

# Make predictions on the test set


In [76]:
p = sigmoid(x_test.values.astype(np.float64).dot(w)) > 0.5
p = p.astype(int)

Type of z: <class 'numpy.ndarray'>
Value of z: [-3.56427043e+00 -5.26279465e+00 -4.87050225e+00 -3.38946049e+00
 -3.39885231e+00 -2.45815668e+00 -3.62193768e+00 -2.41860617e+00
 -3.68856357e-02 -2.81478561e+00 -3.63427105e+00 -3.39160784e+00
 -1.25466485e+00 -5.09566832e+00 -3.30654666e+00  4.36848418e-01
 -2.91147640e+00  1.23777078e-01 -3.82903094e+00 -1.95182773e+00
 -4.18306858e-01 -2.20072278e+00 -2.55544835e+00  2.08290749e+00
  3.08035163e+00 -4.97768648e+00  1.64844039e+00  1.73952754e-02
 -2.96237635e+00 -4.01834178e-01 -4.17369270e+00 -2.29237672e+00
 -4.03832290e+00 -3.79721434e+00  1.32736051e+00  3.01149733e-01
 -4.00520986e+00 -3.33742925e+00 -3.23372945e+00 -2.82831581e+00
 -1.04689750e+00 -2.26708016e+00 -4.39583136e+00 -3.29050518e+00
 -3.30766678e+00 -3.25984837e+00 -1.47778853e-01 -3.26577086e+00
 -8.12442132e-01 -4.27476990e+00 -1.32134859e+00  4.41022562e-01
 -2.51991977e+00  1.28667552e+00  2.17979421e-01 -1.68569274e+00
 -3.96954463e+00 -3.26440882e+00 -3.5520613

# Prepare the results for submission


In [77]:
np_p = np.array(p)
np_id = data_test['PassengerId'].to_numpy()
res = pd.DataFrame({'PassengerId': np_id, 'Survived': np_p})

# Save the results to a CSV file

In [78]:
res.to_csv('/content/submission.csv', index=False)