# Import Dependencies

In [None]:
import pandas as pd
import numpy as np 
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Load the Data

In [None]:
# Load the training and testing data
train_data = pd.read_csv('/kaggle/input/titanic/train.csv')
test_data = pd.read_csv('/kaggle/input/titanic/test.csv')
Accuracy100 = pd.read_csv('/kaggle/input/titanic-leaked/titanic.csv') 

### Fill the missing data , Encode categorical features and drop some features

In [None]:
# Preprocess the data
def preprocess_data(data):
    # Fill missing values
    data['Age'].fillna(data['Age'].median(), inplace=True)
    data['Fare'].fillna(data['Fare'].median(), inplace=True)
    data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

    # Encode categorical features
    data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
    data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

    # Drop irrelevant features
    data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

    return data

In [None]:
# Preprocess the training and testing data
train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)
X_test = test_data
y_test = Accuracy100['Survived']

In [None]:
# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_data.drop(['Survived'], axis=1), train_data['Survived'], test_size=0.2,random_state =123)

# Random Forest

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_val_pred_rf = rf.predict(X_val)
accuracy_rf = accuracy_score(y_val, y_val_pred_rf)
print('Random Forest validation accuracy:', accuracy_rf)

# AdaBoost

In [None]:
ada = AdaBoostClassifier(n_estimators=200, random_state=42)
ada.fit(X_train, y_train)
y_val_pred_ada = ada.predict(X_val)
accuracy_ada = accuracy_score(y_val, y_val_pred_ada)
print('AdaBoost validation accuracy:', accuracy_ada)

# XGBoost

In [None]:
xgb = XGBClassifier(n_estimators=50, random_state=42)
xgb.fit(X_train, y_train)
y_val_pred_xgb = xgb.predict(X_val)
accuracy_xgb = accuracy_score(y_val, y_val_pred_xgb)
print('XGBoost validation accuracy:', accuracy_xgb)

# SVM --> Classification

In [None]:
svm = SVC(gamma= 0.1)
svm.fit(X_train, y_train)
svm_preds = svm.predict(X_val)
svm_acc = accuracy_score(y_val, svm_preds)
print('SVM accuracy:', svm_acc)

# logistic Regression

In [None]:
lr = LogisticRegression(C=1, solver='liblinear', penalty='l1', random_state=42)
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_val)
lr_acc = accuracy_score(y_val, lr_preds)
print('LR accuracy:', lr_acc)


# K-nearest Neighbors 

In [None]:
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_val)
knn_acc = accuracy_score(y_val, y_pred_knn)
print('KNN accuracy:', knn_acc)

# Catboost Classifier

In [None]:
cat = CatBoostClassifier()
# Fit the model to the training data
cat.fit(X_train, y_train,verbose= 0)
y_pred_cat = cat.predict(test_data)
accuracy = accuracy_score(y_test,y_pred_cat)
print('Accuracy:', accuracy)

# Lazypredict

In [None]:
! pip install lazypredict

In [None]:
from lazypredict.Supervised import LazyClassifier

clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models,predictions = clf.fit(X_train, X_test, y_train, y_test)
models

In [None]:
from sklearn.svm import NuSVC
nusvc = NuSVC()
nusvc.fit(X_train, y_train)

In [None]:
import lightgbm as lgb
lgbm = lgb.LGBMClassifier()
lgbm.fit(X_train, y_train)

 # score of all algorithms
 

In [None]:
y_val_pred_nusvc = nusvc.predict(test_data)
accuracy_nusvc = accuracy_score(y_test, y_val_pred_nusvc)
print('NuSVC accuracy:', accuracy_nusvc)

y_val_pred_lgbm = lgbm.predict(test_data)
accuracy_lgbm = accuracy_score(y_test, y_val_pred_lgbm)
print('LGBMClassifier accuracy:', accuracy_lgbm)

y_val_pred_rf = rf.predict(test_data)
accuracy_rf = accuracy_score(y_test, y_val_pred_rf)
print('Random Forest validation accuracy:', accuracy_rf)

y_val_pred_ada = ada.predict(test_data)
accuracy_ada = accuracy_score(y_test, y_val_pred_ada)
print('AdaBoost validation accuracy:', accuracy_ada)

y_pred_svm = svm.predict(test_data)
svm_acc = accuracy_score(y_test, y_pred_svm )
print('SVM accuracy:', svm_acc)

y_pred_lr = lr.predict(test_data)
lr_acc = accuracy_score(y_test, y_pred_lr )
print('LR accuracy:', lr_acc)

y_pred_knn = knn.predict(test_data)
knn_acc = accuracy_score(y_test, y_pred_knn)
print('KNN accuracy:', knn_acc)

y_pred_cat = cat.predict(test_data)
accuracy = accuracy_score(y_test,y_pred_cat)
print('Cat accuracy:', accuracy)

## **-- The End --** 