# Titanic

참고 링크 : https://www.kaggle.com/alexisbcook/titanic-tutorial

In [1]:
import numpy as np
import pandas as pd

### OS 모듈
- 파일을 읽어주는 기능
- 참고 : https://docs.python.org/ko/3/library/os.html
- 이 파일에 있는 import os.ipynb 읽어보기 

In [2]:
import os
for dirname,_,filenames in os.walk('/kaggle/input') :
    for filename in filenames : 
        print(os.path.join(dirname,filename))

## 데이터 로딩하기

In [3]:
train_data = pd.read_csv("train.csv")
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Index 파헤치기
- SibSp : Silbling / Spouses -  형제 있는 수
- Parch : Parents /Children

- 볼 수 있는 곳 : Data Dictionary

## Explore a pattern

여자 중 살아남은 사람

In [5]:
women = train_data.loc[train_data.Sex == 'female']["Survived"]
print(women)

1      1
2      1
3      1
8      1
9      1
      ..
880    1
882    0
885    0
887    1
888    0
Name: Survived, Length: 314, dtype: int64


In [8]:
rate_women = sum(women)/len(women)
print("%f of women who survived" %rate_women)

0.742038 of women who survived


남자 중 살아남은 사람

In [9]:
men = train_data.loc[train_data.Sex == 'male']["Survived"]
print(men)

0      0
4      0
5      0
6      0
7      0
      ..
883    0
884    0
886    0
889    1
890    0
Name: Survived, Length: 577, dtype: int64


In [10]:
rate_men = sum(men)/len(men)
print("%f of women who survived" %rate_men)

0.188908 of women who survived


## Random Forest Model

- 각각의 column끼리 밑에 tree로 연결해주는 m/l 모델

In [11]:
from sklearn.ensemble import RandomForestClassifier

In [12]:
y = train_data["Survived"]
print(y)

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64


In [13]:
features = ["Pclass","Sex","SibSp","Parch"]
x = pd.get_dummies(train_data[features])
print(x)

     Pclass  SibSp  Parch  Sex_female  Sex_male
0         3      1      0           0         1
1         1      1      0           1         0
2         3      0      0           1         0
3         1      1      0           1         0
4         3      0      0           0         1
..      ...    ...    ...         ...       ...
886       2      0      0           0         1
887       1      0      0           1         0
888       3      1      2           1         0
889       1      0      0           0         1
890       3      0      0           0         1

[891 rows x 5 columns]


In [15]:
test_data = pd.read_csv("test.csv")

In [16]:
x_test = pd.get_dummies(test_data[features])
print(x_test)

     Pclass  SibSp  Parch  Sex_female  Sex_male
0         3      0      0           0         1
1         3      1      0           1         0
2         2      0      0           0         1
3         3      0      0           0         1
4         3      1      1           1         0
..      ...    ...    ...         ...       ...
413       3      0      0           0         1
414       1      0      0           1         0
415       3      0      0           0         1
416       3      0      0           0         1
417       3      1      1           0         1

[418 rows x 5 columns]


In [17]:
model = RandomForestClassifier(n_estimators = 100, max_depth=5, random_state=1)
print(model)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=5, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)


In [19]:
model.fit(x,y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=5, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [21]:
predictions = model.predict(x_test)
print(predictions)

[0 1 0 0 1 0 1 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1
 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 1 0 0 1 1 0 1 0
 1 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0
 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0
 0 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1
 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 1 0
 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1
 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0
 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 1 1 0
 0 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 0 0
 0 1 1 1 1 1 0 1 0 0 0]


In [23]:
output = pd.DataFrame({'PassengerId':test_data.PassengerId,'Survived':predictions})
print(output)

     PassengerId  Survived
0            892         0
1            893         1
2            894         0
3            895         0
4            896         1
..           ...       ...
413         1305         0
414         1306         1
415         1307         0
416         1308         0
417         1309         0

[418 rows x 2 columns]


In [24]:
output.to_csv('my_submission.csv',index=False)

In [25]:
print("Your submission was successfully saved!")

Your submission was successfully saved!
