In [1]:
%store -r df
print(df.head())

      Fare   Age  Sex_male  Pclass  Title_Mr  Title_Master  SibSp  \
0   7.2500  22.0      True       3      True         False      1   
1  71.2833  38.0     False       1     False         False      1   
2   7.9250  26.0     False       3     False         False      0   
3  53.1000  35.0     False       1     False         False      1   
4   8.0500  35.0      True       3      True         False      0   

   CabinLetter_U  Survived  
0           True         0  
1          False         1  
2           True         1  
3          False         1  
4           True         0  


In [3]:
# Random Forest Classifier 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

# Test - Train Split
X = df.drop(columns=['Survived']) # All features
y = df['Survived'] # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Training
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.8212290502793296
              precision    recall  f1-score   support

           0       0.84      0.86      0.85       105
           1       0.79      0.77      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179

[[90 15]
 [17 57]]


In [4]:
# Logistic Regression with L1 Regularization
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Test - Train Split
X = df.drop(columns=['Survived']) # All features
y = df['Survived'] # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression with L1 penalty
lr = LogisticRegression(penalty='l1', solver='liblinear', random_state=42)
lr.fit(X_train, y_train)

# Predictions
y_pred_lr = lr.predict(X_test)

# Model Evaluation
acc_lr = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Accuracy: {acc_lr:.4f}")

print("Classification Report:")
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

Logistic Regression Accuracy: 0.8101
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.85      0.84       105
           1       0.78      0.76      0.77        74

    accuracy                           0.81       179
   macro avg       0.80      0.80      0.80       179
weighted avg       0.81      0.81      0.81       179

[[89 16]
 [18 56]]


In [5]:
# Gradient Boosting Classifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Test - Train Split
X = df.drop(columns=['Survived']) # All features
y = df['Survived'] # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
gb = GradientBoostingClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

# Train
gb.fit(X_train, y_train)

# Predict
y_pred_gb = gb.predict(X_test)

# Metrics
print(f"GradientBoosting Accuracy: {accuracy_score(y_test, y_pred_gb):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_gb))
print(confusion_matrix(y_test, y_pred_gb))


GradientBoosting Accuracy: 0.8380

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       105
           1       0.84      0.76      0.79        74

    accuracy                           0.84       179
   macro avg       0.84      0.83      0.83       179
weighted avg       0.84      0.84      0.84       179

[[94 11]
 [18 56]]


In [6]:
from joblib import dump

dump(gb, 'gb.joblib')



['gb.joblib']