In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('bmh')
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import files
uploaded = files.upload()
titanic = pd.read_csv('titanic_train.csv')
titanic

In [None]:
titanic.shape

In [None]:
titanic.columns

In [None]:
titanic.head()

In [None]:
#Information about Dataset
titanic.info()

In [None]:
#Exploration of Dataset
titanic.describe()

In [None]:
import seaborn as sns
sns.countplot(x='Sex', data=titanic)
plt.show()

In [None]:
sns.countplot(x='Pclass', data=titanic)
plt.show()

In [None]:
sns.countplot(x='Pclass', data=titanic, hue = 'Sex')
plt.show()

In [None]:
sns.distplot(titanic['Age'])
plt.show()

In [None]:
sns.distplot(titanic['Age'], kde=False, bins=20)
plt.show()

In [None]:
titanic['Along'] = titanic['SibSp']+titanic['Parch']

In [None]:
titanic.tail()

In [None]:
titanic['Along'].loc[titanic['Along'] > 0] = 1

In [None]:
titanic.tail()

In [None]:
#Survival Factor
#For Pclass
sns.countplot(x='Pclass', data=titanic, hue = 'Survived')
plt.show()
sns.countplot(x='Sex', data=titanic, hue = 'Survived')
plt.show()

In [None]:
print(titanic.columns)

In [None]:
# Data Preparation
# Deleting some columns
titanic = titanic.drop(columns = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked'], errors='ignore')
titanic.head()

In [None]:
# In the column of gender where there is a male it will be denoted by '1' and where there is female it will be dented by '0'
titanic['Sex'] = np.where(titanic['Sex'] == 'male', 1, 0)

In [None]:
titanic.head()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(titanic.corr(), annot=True)

In [None]:
#Handling of missing Values
titanic.isnull().sum()

In [None]:
sns.boxplot(x='Pclass', y='Age', data=titanic)
plt.show()

In [None]:
print(titanic[titanic['Pclass']==1]['Age'].mean())
print(titanic[titanic['Pclass']==2]['Age'].mean())
print(titanic[titanic['Pclass']==3]['Age'].mean())

In [None]:
def fill_age(row):
  age = row[0]
  pclass = row[1]
  if pd.isnull(age):
    if pclass == 1:
      return 38.23
    elif pclass == 2:
      return 29.87
    else:
      return 25.14
  else:
    return age

In [None]:
titanic['Age'] = titanic[['Age', 'Pclass']].apply(fill_age, axis = 1)

In [None]:
titanic.isnull().sum()

In [None]:
#Model Building

In [None]:
X = titanic.drop(columns=['Survived'])
y = titanic['Survived']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.2, random_state=0)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
X_train

In [None]:
y_train

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
model = DecisionTreeClassifier(criterion='entropy', max_depth=3)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred

In [None]:
y_test

In [None]:
model.score(X_test, y_test)

In [None]:
model.score(X_train, y_train)

In [None]:
from sklearn.metrics import classification_report, f1_score

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
f1_score(y_test, y_pred)

In [None]:
#Visualization Of Tree
features = X.columns
from sklearn import tree

In [None]:
features

In [None]:
plt.figure(figsize=(15,15))
result = tree.plot_tree(model, feature_names=features)