### Import Lib & Data

In [None]:
import pandas as pd

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [None]:
train.head()

### Missing Value Treatment

In [None]:
train.isnull().sum()

In [None]:
test.isnull().sum()

In [None]:
titanic = pd.concat([train,test], sort = False)
titanic.describe()

In [None]:
titanic.isnull().sum()

In [None]:
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())
titanic['Fare'] = titanic['Fare'].fillna(titanic['Fare'].median())

In [None]:
titanic_all = titanic.drop(['PassengerId','Name','Cabin','Embarked', 'Ticket'],axis=1)

In [None]:
titanic_all.info()

In [None]:
titanic_all['Sex'] = titanic_all['Sex'].map({'male': 1, 'female': 0})
titanic_all.info()

In [None]:
#Get Train
t_train = titanic_all[titanic_all['Survived'].notnull()]
t_train.describe()

In [30]:
#Get Test
s_test = titanic_all[titanic_all['Survived'].isnull()]
s_test.describe()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
count,0.0,418.0,418.0,418.0,418.0,418.0,418.0
mean,,2.26555,0.636364,29.805024,0.447368,0.392344,35.576535
std,,0.841838,0.481622,12.667969,0.89676,0.981429,55.850103
min,,1.0,0.0,0.17,0.0,0.0,0.0
25%,,1.0,0.0,23.0,0.0,0.0,7.8958
50%,,3.0,1.0,28.0,0.0,0.0,14.4542
75%,,3.0,1.0,35.75,1.0,0.0,31.471875
max,,3.0,1.0,76.0,8.0,9.0,512.3292


### Train Test Split

In [None]:
X = t_train.drop('Survived', axis = 1)

In [None]:
y = t_train['Survived']

In [None]:
import sklearn.model_selection as model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X,y,test_size=0.2, random_state = 200)

### Logistic Model

In [21]:
#Import libraries
from sklearn.linear_model import LogisticRegression

In [22]:
logreg = LogisticRegression(solver = 'lbfgs')

In [23]:
logreg.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [32]:
print(f'Train : {logreg.score(X_train, y_train):.2f}')
print(f'Test : {logreg.score(X_test, y_test):.2f}')

Train : 0.80
Test : 0.78


In [33]:
sub_test = s_test.drop(['Survived'], axis = 1)
sub_test_pred = logreg.predict(sub_test).astype(int)

In [36]:
AllSub = pd.DataFrame({ 'PassengerId': test['PassengerId'],
                       'Survived' : sub_test_pred
    
})

AllSub.to_csv("Basic_Log_model.csv", index = False)

### Bagging Classifier

In [59]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [60]:
bclf=BaggingClassifier(oob_score=False,n_jobs=-1,n_estimators=20,random_state=200,
                      base_estimator=DecisionTreeClassifier())

In [61]:
from sklearn.model_selection import GridSearchCV

In [62]:
parameters = {'n_estimators' : (5,10,15,20,30,40,50,60,70,80,90,100)}

In [63]:
Bag_grid  = GridSearchCV(bclf, param_grid = parameters, cv = 3)

In [64]:
Bag_model = Bag_grid.fit(X_train, y_train)

In [65]:
Bag_grid.best_estimator_

BaggingClassifier(base_estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                        class_weight=None,
                                                        criterion='gini',
                                                        max_depth=None,
                                                        max_features=None,
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
                                                        presort='deprecated',
                                                        random_state=None,


In [66]:
print(f'Train : {Bag_grid.score(X_train, y_train):.2f}')
print(f'Test : {Bag_grid.score(X_test, y_test):.2f}')

Train : 0.97
Test : 0.73


In [46]:
sub_test = s_test.drop(['Survived'], axis = 1)
sub_test_pred = bclf.predict(sub_test).astype(int)

In [47]:
AllSub = pd.DataFrame({ 'PassengerId': test['PassengerId'],
                       'Survived' : sub_test_pred
    
})

AllSub.to_csv("Basic_Bagging_model.csv", index = False)