In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB


# Load the Titanic dataset
df = pd.read_csv('Titanic-Dataset.csv')

# 1. Prepare Data
# Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
target = 'Survived'

# Create a copy to avoid SettingWithCopyWarning
X = df[features].copy()
y = df[target]

# 2. Handle Missing Values and Encode Categorical Features
# Fill missing Age values with the median (without inplace=True)
X['Age'] = X['Age'].fillna(X['Age'].median())

# Fill missing Embarked values (if any)
X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])

# One-hot encode categorical features
X = pd.get_dummies(X, columns=['Pclass', 'Sex', 'Embarked'], drop_first=True)

# 3. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Scale Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



model1 = LogisticRegression(max_iter=1000)
model2 = MLPClassifier(max_iter=2000)
model3 = SVC()
model4 = DecisionTreeClassifier()
model5= RandomForestClassifier()
model6 = GaussianNB() 

print("---------------------------------------------------MODEL1-LOGISTICREGRESSION---------------------------------------------------------------------------")

#FOR THE MODEL 1--->LOGISTIC REGRESSION

# 6. Cross-Validation
cv_scores = cross_val_score(model1, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# 7. Train the Model on the Entire Training Set
model1.fit(X_train_scaled, y_train)

# 8. Evaluate on the Test Set
y_pred = model1.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", test_accuracy)


print("---------------------------------------------------MODEL2-MLP---------------------------------------------------------------------------")

#FOR THE MODEL 2--->MLP 

# 6. Cross-Validation
cv_scores = cross_val_score(model2, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# 7. Train the Model on the Entire Training Set
model2.fit(X_train_scaled, y_train)

# 8. Evaluate on the Test Set
y_pred = model2.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", test_accuracy)

print("---------------------------------------------------MODEL3-SVC---------------------------------------------------------------------------")

#FOR THE MODEL 3--->SVC

# 6. Cross-Validation
cv_scores = cross_val_score(model3, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# 7. Train the Model on the Entire Training Set
model3.fit(X_train_scaled, y_train)

# 8. Evaluate on the Test Set
y_pred = model3.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", test_accuracy)

print("---------------------------------------------------MODEL4-DecisionTrees---------------------------------------------------------------------------")

#FOR THE MODEL 4--->DecisionTrees

# 6. Cross-Validation
cv_scores = cross_val_score(model4, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# 7. Train the Model on the Entire Training Set
model4.fit(X_train_scaled, y_train)

# 8. Evaluate on the Test Set
y_pred = model4.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", test_accuracy)


print("---------------------------------------------------MODEL5-RandomForest---------------------------------------------------------------------------")

#FOR THE MODEL 5--->RandomForest

# 6. Cross-Validation
cv_scores = cross_val_score(model5, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# 7. Train the Model on the Entire Training Set
model5.fit(X_train_scaled, y_train)

# 8. Evaluate on the Test Set
y_pred = model5.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", test_accuracy)

print("---------------------------------------------------MODEL6-Naive-Bayes---------------------------------------------------------------------------")

#FOR THE MODEL 6--->Naive-Bayes

# 6. Cross-Validation
cv_scores = cross_val_score(model6, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

# 7. Train the Model on the Entire Training Set
model6.fit(X_train_scaled, y_train)

# 8. Evaluate on the Test Set
y_pred = model6.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test set accuracy:", test_accuracy)





---------------------------------------------------MODEL1-LOGISTICREGRESSION---------------------------------------------------------------------------
Cross-validation scores: [0.8041958  0.81118881 0.78873239 0.75352113 0.81690141]
Mean cross-validation score: 0.794907908992416
Test set accuracy: 0.7988826815642458
---------------------------------------------------MODEL2-MLP---------------------------------------------------------------------------
Cross-validation scores: [0.81118881 0.85314685 0.81690141 0.78169014 0.81690141]
Mean cross-validation score: 0.8159657244164287
Test set accuracy: 0.8268156424581006
---------------------------------------------------MODEL3-SVC---------------------------------------------------------------------------
Cross-validation scores: [0.81818182 0.84615385 0.84507042 0.77464789 0.84507042]
Mean cross-validation score: 0.825824879346006
Test set accuracy: 0.8212290502793296
---------------------------------------------------MODEL4-DecisionTrees-