<a href="https://www.kaggle.com/code/apinyacharoenchap/titanic-with-many-models?scriptVersionId=138040568" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

Thanks

[I have created my notebook from following this ](https://www.kaggle.com/code/vbmokin/titanic-0-83253-comparison-20-popular-models)


[I have used preprocessing from following this](https://www.kaggle.com/code/furduisorinoctavian/titanic-with-neural-networks-78)

# Table of Contents
1. Loading Data
2. Preprocessing Data
3. Train Model

In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# models
from sklearn.linear_model import LogisticRegression, Perceptron, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier



## 1.Loading Data

In [2]:
data_train = pd.read_csv("/kaggle/input/titanic/train.csv")
data_test = pd.read_csv("/kaggle/input/titanic/test.csv")
submission = pd.read_csv('../input/titanic/gender_submission.csv')
data_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## 2.Preprocessing Data 

Feature

1.PClass

2.Name

3.Sex

4.Age

5.Embarked

6. Parch

### Name

In [3]:
data_train["Title"] = data_train.Name.str.split(',').str[1].str.split('.').str[0].str.strip()
data_test["Title"] = data_test.Name.str.split(',').str[1].str.split('.').str[0].str.strip()

In [4]:
data_train['Title'].value_counts()

Mr              517
Miss            182
Mrs             125
Master           40
Dr                7
Rev               6
Mlle              2
Major             2
Col               2
the Countess      1
Capt              1
Ms                1
Sir               1
Lady              1
Mme               1
Don               1
Jonkheer          1
Name: Title, dtype: int64

In [5]:
data_train['Title'] = data_train['Title'].replace('Mlle','Miss')
data_train['Title'] = data_train['Title'].replace('Mme','Mrs')

data_test['Title'] = data_test['Title'].replace('Mlle','Miss')
data_test['Title'] = data_test['Title'].replace('Mme','Mrs')

In [6]:
le = preprocessing.LabelEncoder()
data_train["Title"] = le.fit_transform(data_train["Title"])
data_test["Title"] = le.fit_transform(data_test["Title"])

### SEX

In [7]:
data_train['Sex'].value_counts()

male      577
female    314
Name: Sex, dtype: int64

In [8]:
le = preprocessing.LabelEncoder()
data_train["Sex"] = le.fit_transform(data_train["Sex"])
data_test["Sex"] = le.fit_transform(data_test["Sex"])
data_train.head(10)
# sex = 1 => male
# sex = 0 => female

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,S,9
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,C,10
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,S,8
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1,C123,S,10
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.05,,S,9
5,6,0,3,"Moran, Mr. James",1,,0,0,330877,8.4583,,Q,9
6,7,0,1,"McCarthy, Mr. Timothy J",1,54.0,0,0,17463,51.8625,E46,S,9
7,8,0,3,"Palsson, Master. Gosta Leonard",1,2.0,3,1,349909,21.075,,S,7
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,27.0,0,2,347742,11.1333,,S,10
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",0,14.0,1,0,237736,30.0708,,C,10


### Age 

In [9]:
def ageCategory(age):
    if 0 <= age < 1:
        category = 0  # newborn 
    elif 1 <= age < 6:
        category = 1  # child 1
    elif 6 <= age < 12:
        category = 2  # child 2
    elif 12 <= age < 18:
        category = 3  # teenager
    elif 18 <= age < 30:
        category = 4  # adult 1
    elif 30 <= age < 45:
        category = 5  # adult 2
    elif 45 <= age < 65:
        category = 6  # adult 3
    else:
        category = 7  # old people
    return category


def addAgeCat(data):
    ageCategoryList = []
    for age in data["Age"]:
        ageCategoryList.append(ageCategory(age))
    return ageCategoryList


data_train["AgeCat"] = addAgeCat(data_train)
data_test["AgeCat"] = addAgeCat(data_test)
data_train.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title,AgeCat
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,S,9,4
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,C,10,5
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,S,8,4
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1,C123,S,10,5
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.05,,S,9,5
5,6,0,3,"Moran, Mr. James",1,,0,0,330877,8.4583,,Q,9,7
6,7,0,1,"McCarthy, Mr. Timothy J",1,54.0,0,0,17463,51.8625,E46,S,9,6
7,8,0,3,"Palsson, Master. Gosta Leonard",1,2.0,3,1,349909,21.075,,S,7,1
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,27.0,0,2,347742,11.1333,,S,10,4
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",0,14.0,1,0,237736,30.0708,,C,10,3


### Embarked

In [10]:
data_train["Embarked"] = le.fit_transform(data_train["Embarked"])
data_test["Embarked"] = le.fit_transform(data_test["Embarked"])
data_train.head(10)
#s-->2
#c-->0
#Q-->1

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Title,AgeCat
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,2,9,4
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.2833,C85,0,10,5
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,2,8,4
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.0,1,0,113803,53.1,C123,2,10,5
4,5,0,3,"Allen, Mr. William Henry",1,35.0,0,0,373450,8.05,,2,9,5
5,6,0,3,"Moran, Mr. James",1,,0,0,330877,8.4583,,1,9,7
6,7,0,1,"McCarthy, Mr. Timothy J",1,54.0,0,0,17463,51.8625,E46,2,9,6
7,8,0,3,"Palsson, Master. Gosta Leonard",1,2.0,3,1,349909,21.075,,2,7,1
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",0,27.0,0,2,347742,11.1333,,2,10,4
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",0,14.0,1,0,237736,30.0708,,0,10,3


In [11]:
data_train["Title"].value_counts()

9     517
8     184
10    126
7      40
3       7
12      6
6       2
1       2
2       1
11      1
5       1
13      1
0       1
14      1
4       1
Name: Title, dtype: int64

# 3. Prepaing to the modelling

In [12]:
X = data_train[["Sex", "Title", "AgeCat", "Pclass", "Parch", "Embarked"]]
y = data_train["Survived"]

X_test = data_test[["Sex", "Title", "AgeCat", "Pclass", "Parch", "Embarked"]]

In [13]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Modelling

## 3.1 Logistic Regression 

In [14]:
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_valid).astype(int)

accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

## 0.75598

0.8156424581005587


In [15]:
y_prediction = logreg.predict(X_test).astype(int)

In [16]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_logreg.csv', index=False)

print("Submission saved")
## Accuracy = 0.75598 

Submission saved


## 3.2 Support Vector Machines 

In [17]:
svc = SVC()
svc.fit(X_train, y_train)
y_pred = logreg.predict(X_valid).astype(int)

accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.8156424581005587


In [18]:
y_prediction = svc.predict(X_test).astype(int)

In [19]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_svm.csv', index=False)

print("Submission saved")

##0.77751

Submission saved


## 3.3 NN

In [20]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(15, 2), random_state=1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_valid).astype(int)

accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.8324022346368715


In [21]:
y_prediction = clf.predict(X_test).astype(int)

In [22]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_NN.csv', index=False)

print("Submission saved")

##0.74401

Submission saved


## 3.4 Decision Tree

In [23]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_valid).astype(int)
accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.7877094972067039


In [24]:
y_prediction = decision_tree.predict(X_test).astype(int)

In [25]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_DC.csv', index=False)

print("Submission saved")
## 0.643

Submission saved


## 3.5 Random Forest

In [26]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the model on the training data
rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_valid).astype(int)
accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.7877094972067039


In [27]:
y_prediction = rf_classifier.predict(X_test).astype(int)

In [28]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_RF.csv', index=False)

print("Submission saved")

Submission saved


## 3.6 Gaussian Naive Bayes

In [29]:
gaussian = GaussianNB()
gaussian.fit(X_train, y_train)

y_pred = gaussian.predict(X_valid).astype(int)
accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.7653631284916201


In [30]:
y_prediction = gaussian.predict(X_test).astype(int)

In [31]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_NB.csv', index=False)

print("Submission saved")
## 0.39234

Submission saved


## 3.7 Ridge

In [32]:
ridge_classifier = RidgeClassifier()
ridge_classifier.fit(X_train, y_train)

y_pred = ridge_classifier.predict(X_valid).astype(int)
accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.7932960893854749


In [33]:
y_prediction = ridge_classifier.predict(X_test).astype(int)

In [34]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/submission_Ridge.csv', index=False)

print("Submission saved")
## 0.77272

Submission saved


## 3.8 Bagging Classifier

In [35]:
# Bagging Classifier

bagging_classifier = BaggingClassifier()
bagging_classifier.fit(X_train, y_train)

y_pred = bagging_classifier.predict(X_valid).astype(int)
accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.8156424581005587


In [36]:
y_prediction = bagging_classifier.predict(X_test).astype(int)

In [37]:
submission = pd.DataFrame({"PassengerId": submission["PassengerId"],"Survived": y_prediction})
submission.to_csv('/kaggle/working/bagging.csv', index=False)

print("Submission saved")
## 0.77272

Submission saved


# Support vector is the best result (0.77751)