# タイタニックデータを用いた数量化理論

## ライブラリのインポート

In [2]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt

## データ読み込み

In [4]:
df = pd.read_csv("dataset/titanic/train.csv")
df = df.dropna() # 今回欠損値は削除する
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S


In [28]:
# 今回はカテゴリカラムを使用するので、量的変数は削除
x1 = pd.get_dummies(df.drop(["PassengerId", "Survived","Pclass","Name", "Age", "Fare", "Ticket", "Cabin"], axis=1), drop_first = True)
x1.head()

Unnamed: 0,SibSp,Parch,Sex_male,Embarked_Q,Embarked_S
1,1,0,False,False,False
3,1,0,False,False,True
6,0,0,True,False,True
10,1,1,False,False,True
11,0,0,False,False,True


In [31]:
x1[["Sex_male", "Embarked_Q", "Embarked_S"]] = x1[["Sex_male", "Embarked_Q", "Embarked_S"]].astype(int)
x1.head()

Unnamed: 0,SibSp,Parch,Sex_male,Embarked_Q,Embarked_S
1,1,0,0,0,0
3,1,0,0,0,1
6,0,0,1,0,1
10,1,1,0,0,1
11,0,0,0,0,1


## 数量化Ⅰ類

### 年齢を分析

In [32]:
y1 = df["Age"]
model1 = sm.OLS(y1, x1).fit() # Ordinary Least Squares (最小二乗法)
model1.summary()

0,1,2,3
Dep. Variable:,Age,R-squared (uncentered):,0.649
Model:,OLS,Adj. R-squared (uncentered):,0.639
Method:,Least Squares,F-statistic:,65.74
Date:,"Tue, 11 Feb 2025",Prob (F-statistic):,1.25e-38
Time:,08:40:19,Log-Likelihood:,-834.08
No. Observations:,183,AIC:,1678.0
Df Residuals:,178,BIC:,1694.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
SibSp,6.6226,2.725,2.430,0.016,1.245,12.000
Parch,0.4254,2.352,0.181,0.857,-4.216,5.066
Sex_male,23.0024,3.029,7.593,0.000,17.024,28.981
Embarked_Q,17.0649,17.084,0.999,0.319,-16.649,50.778
Embarked_S,18.7679,3.008,6.240,0.000,12.833,24.703

0,1,2,3
Omnibus:,2.392,Durbin-Watson:,1.733
Prob(Omnibus):,0.302,Jarque-Bera (JB):,2.002
Skew:,-0.237,Prob(JB):,0.368
Kurtosis:,3.197,Cond. No.,12.2
