# **Building a Decision Tree Model to predict survival based on certain parameters**

In [19]:
import pandas as pd
df = pd.read_csv("Titanic.csv")


In [20]:
df.shape

(891, 12)

In [21]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## **Dividing the targeted variable and independant variable**

In [25]:
target = df['Survived'] # dependant variable

In [26]:
target.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

In [28]:
inputs = df.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked','Survived'],axis='columns')
inputs

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,male,22.0,7.2500
1,1,female,38.0,71.2833
2,3,female,26.0,7.9250
3,1,female,35.0,53.1000
4,3,male,35.0,8.0500
...,...,...,...,...
886,2,male,27.0,13.0000
887,1,female,19.0,30.0000
888,3,female,,23.4500
889,1,male,26.0,30.0000


### Converting catogorical values into numerical values

# **Encoding**

In [31]:
from sklearn.preprocessing import LabelEncoder
le_Sex = LabelEncoder()
inputs['Sex_n'] = le_Sex.fit_transform(inputs['Sex'])
inputs

Unnamed: 0,Pclass,Sex,Age,Fare,Sex_n
0,3,male,22.0,7.2500,1
1,1,female,38.0,71.2833,0
2,3,female,26.0,7.9250,0
3,1,female,35.0,53.1000,0
4,3,male,35.0,8.0500,1
...,...,...,...,...,...
886,2,male,27.0,13.0000,1
887,1,female,19.0,30.0000,0
888,3,female,,23.4500,0
889,1,male,26.0,30.0000,1


In [33]:
inputs_n = inputs.drop('Sex',axis='columns')
inputs_n

Unnamed: 0,Pclass,Age,Fare,Sex_n
0,3,22.0,7.2500,1
1,1,38.0,71.2833,0
2,3,26.0,7.9250,0
3,1,35.0,53.1000,0
4,3,35.0,8.0500,1
...,...,...,...,...
886,2,27.0,13.0000,1
887,1,19.0,30.0000,0
888,3,,23.4500,0
889,1,26.0,30.0000,1


In [34]:
target

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

## **By Using Decision Tree** 

The passenger will survive or not.

In [35]:
from sklearn import tree
model = tree.DecisionTreeClassifier()

## **Data cleaning**

In [38]:
inputs_n.fillna(0, inplace = True)
inputs_n

Unnamed: 0,Pclass,Age,Fare,Sex_n
0,3,22.0,7.2500,1
1,1,38.0,71.2833,0
2,3,26.0,7.9250,0
3,1,35.0,53.1000,0
4,3,35.0,8.0500,1
...,...,...,...,...
886,2,27.0,13.0000,1
887,1,19.0,30.0000,0
888,3,0.0,23.4500,0
889,1,26.0,30.0000,1


### **Model train**

In [39]:
model.fit(inputs_n,target)

DecisionTreeClassifier()

## **Accuracy for trained data**

In [40]:
model.score(inputs_n, target)

0.9797979797979798

# **Predictions**

### **when pclass is 3 , Age = 22.0 , if the fare is  7.2500 and it is male , will he/she would likely to survive?**

In [42]:
model.predict([[3	,22.0, 7.2500,1]])

  "X does not have valid feature names, but"


array([0])

 ### **when pclass is 1 , Age is 38.0 , if the fare is 71.2833 and it is female , will he/she would likely to survive?**

In [45]:
model.predict([[1,38.0,71.2833,0]])

  "X does not have valid feature names, but"


array([1])

### **when pclass is 1 , Age = 19.0, if the fare is 30.000 and it is male , will he/she would likely to survive?**

In [47]:
model.predict([[1,19.0,30.000,0]])

  "X does not have valid feature names, but"


array([1])

### **when pclass is 1 , Age = 26.0, if the fare is 30.000 and it is male , will he/she would likely to survive?**




In [48]:
model.predict([[1,26.0,30.0000,1]])

  "X does not have valid feature names, but"


array([1])

### **when pclass is 1 , Age = 26.0, if the fare is 30.000 and it is male , will he/she would likely to survive?**

In [49]:
model.predict([[1,26.0,30.000,1]])

  "X does not have valid feature names, but"


array([1])

In [50]:
model.score(inputs_n, target)

0.9797979797979798