In [10]:
import pandas as pd
import seaborn as sns

# 1) 読み込み
df = sns.load_dataset("titanic")

# 2) 全体把握
print(df.head())
print(df.info())

# 3) 欠損・型の整形（最低限）
df["age"] = df["age"].fillna(df["age"].median())
df["embarked"] = df["embarked"].fillna("Unknown")

# 4) 使いやすい計算列（例：子供フラグ）
df["is_child"] = df["age"] < 16

# 5) 集計（groupby）: 生存率を出す（超頻出パターン）
report = (
    df.groupby(["sex", "class"], dropna=False)
      .agg(
          passengers=("survived", "size"),
          survived=("survived", "sum"),
          survival_rate=("survived", "mean"),
          avg_age=("age", "mean"),
      )
      .reset_index()
      .sort_values("survival_rate", ascending=False)
)

print(report.head(10))

# 6) 出力
report.to_csv("titanic_report.csv", index=False)


   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-nu

  df.groupby(["sex", "class"], dropna=False)
