In [29]:
#Imports
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier, VotingClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC




In [30]:
# Load data
train = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
gender_submission = pd.read_csv("gender_submission.csv")



In [31]:
# Fill missing age with median or mean
train['Age'].fillna(value = round(train['Age'].mean()), inplace = True)
test_df['Age'].fillna(value = round(test_df['Age'].mean()), inplace = True)
test_df['Fare'] = test_df['Fare'].fillna(round(test_df['Fare'].mean()))

# Convert 'Sex' into binary (0 for male and 1 for female)
train['Sex_binary'] = train['Sex'].map({"male": 0, "female": 1})
test_df['Sex_binary'] = test_df['Sex'].map({"male": 0, "female": 1})


# Assigning the features (x) and Labels (y) to standardized nameing scheme
X_train = train[["Pclass","Age","SibSp","Parch","Fare","Sex_binary"]]
y_train = train["Survived"]
X_test = test_df[["Pclass","Age","SibSp","Parch","Fare","Sex_binary"]]
y_test = gender_submission["Survived"]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train['Age'].fillna(value = round(train['Age'].mean()), inplace = True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['Age'].fillna(value = round(test_df['Age'].mean()), inplace = True)


In [32]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [33]:
# Define the models
bagging_model = RandomForestClassifier(n_estimators=100, random_state=42)
boosting_model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=42)
stacking_model = StackingClassifier(estimators=[
    ('lr', LogisticRegression()), 
    ('svc', SVC(probability=True))
], final_estimator=RandomForestClassifier())

voting_model = VotingClassifier(estimators=[
    ('lr', LogisticRegression()), 
    ('rf', RandomForestClassifier()), 
    ('svc', SVC(probability=True))
], voting='soft')

# Train and evaluate each model
models = {
    "Bagging": bagging_model,
    "Boosting": boosting_model,
    "Stacking": stacking_model,
    "Voting": voting_model
}

for name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"{name} Model Accuracy: {accuracy:.4f}")




Bagging Model Accuracy: 0.8254
Boosting Model Accuracy: 0.9211
Stacking Model Accuracy: 0.8325
Voting Model Accuracy: 0.9402


In [34]:
# Streamlit integration (example)
#import streamlit as st
#
#st.title("Titanic Survival Prediction")
#
#model_choice = st.selectbox("SVC", ["Bagging", "Boosting", "Stacking", "Voting"])
#
#if model_choice:
#    model = models[model_choice]
#    predictions = model.predict(test_features_norm)
#    accuracy = accuracy_score(test_labels, predictions)
#    st.write(f"{model_choice} Model Accuracy: {accuracy:.4f}")
#
#    st.write("Predictions for the first 10 passengers in the test set:")
#    st.write(predictions[:10])