In [1]:
import numpy as np
import pandas as pd

In [13]:
titanic = pd.read_csv("./data/titanic.csv")
titanic.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='str')

In [15]:
# 01. 클래스별 생존률
print(titanic.groupby("Pclass")["Survived"].mean().round(2))

Pclass
1    0.63
2    0.47
3    0.24
Name: Survived, dtype: float64


In [16]:
# 02. 성별에 따른 평균 나이와 평균 요금
print(titanic.groupby("Sex")[["Age", "Fare"]].mean().round(2))

          Age   Fare
Sex                 
female  27.92  44.48
male    30.73  25.52


In [35]:
# 03. 출항지별 승객 수와 생존자 수
df1 = titanic.groupby("Embarked")["PassengerId"].count().rename("total_passengers")
df2 = titanic[titanic["Survived"] == 1].groupby("Embarked")["Survived"].sum().rename("survived_count")

df3 = pd.merge(df1, df2, on="Embarked", how="inner")
print(df3)

          total_passengers  survived_count
Embarked                                  
C                      168              93
Q                       77              30
S                      644             217


In [29]:
# 04. 나이 구간별 생존률
ser = titanic["Age"]
sub = pd.cut(ser, bins=[0, 10, 20, 30, 40, 50, np.inf], labels=["0~10", "10~20", "20~30", "30~40", "40~50", "50+"], include_lowest=True)
titanic.loc[:, "AgeGroup"] = sub
res = titanic.groupby("AgeGroup")["Survived"].mean().round(2)
print(res)

AgeGroup
0~10     0.59
10~20    0.38
20~30    0.37
30~40    0.45
40~50    0.38
50+      0.34
Name: Survived, dtype: float64


In [33]:
# 05. 동반 가족 수에 따른 평균 생존률
pt = pd.pivot_table(
    titanic,
    index="SibSp",
    columns="Parch",
    values="Survived",
    aggfunc="mean",
    fill_value=0.00
).round(2)

print(pt)

Parch     0     1     2     3    4     5    6
SibSp                                        
0      0.30  0.66  0.72  1.00  0.0  0.00  0.0
1      0.52  0.60  0.63  0.33  0.0  0.33  0.0
2      0.25  0.86  0.50  1.00  0.0  0.00  0.0
3      1.00  0.00  0.29  0.00  0.0  0.00  0.0
4      0.00  0.00  0.33  0.00  0.0  0.00  0.0
5      0.00  0.00  0.00  0.00  0.0  0.00  0.0
8      0.00  0.00  0.00  0.00  0.0  0.00  0.0
