In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')

# データの準備
data = {
    'A-GPT-AGREE': [2, 2, 2, 2, 2, 2, 4, 2, 4, 2],
    '1-反論の遠さ(正規化)': [0.2916666667, 0.391025641, 0.4886363636, 0.6306818182, 0.5357142857, 0.1666666667, 0.6370967742, 0.45, 0.84375, 0.375],
    '反論のラリー(正規化)': [0.5149038462, 0.4903846154, 0.6346153846, 0.4903846154, 0.6183110368, 0.2064777328, 0.6937148218, 0.3064903846, 0.6538461538, 0.450928382],
    '1-反論の間隔v1(正規化)': [0.0714786917, 0.180349489, 0.6554566436, 0.2443560396, 0.5482078093, 0.7245380183, 0.9208704253, 0.1673810309, 0.8694362018, 0.9274645565],
    '反論の順序の対応度(正規化)': [0.4048442907, 0.613003096, 0.6274509804, 0.2301790281, 0.5173796791, 0.7537556561, 0.4191176471, 0.9653979239, 0.9264705882, 0.5294117647]
}

df = pd.DataFrame(data)

# 目的変数と説明変数の設定
y = df['A-GPT-AGREE'].values
X = df[['1-反論の遠さ(正規化)', '反論のラリー(正規化)', '1-反論の間隔v1(正規化)', '反論の順序の対応度(正規化)']].values

# モデルの定義
models = {
    '重回帰': LinearRegression(),
    'Ridge回帰': Ridge(alpha=1.0),
    'Lasso回帰': Lasso(alpha=0.1, max_iter=1000)
}

# Leave-One-Out Cross-Validation
loo = LeaveOneOut()

results = {}

for model_name, model in models.items():
    predictions = []
    true_values = []
    
    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        # モデルの訓練
        model.fit(X_train, y_train)
        
        # 予測
        y_pred = model.predict(X_test)
        
        predictions.append(y_pred[0])
        true_values.append(y_test[0])
    
    predictions = np.array(predictions)
    true_values = np.array(true_values)
    
    # 評価指標の計算
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = mean_absolute_error(true_values, predictions)
    correlation, _ = pearsonr(true_values, predictions)
    
    results[model_name] = {
        'RMSE': rmse,
        'MAE': mae,
        '相関': correlation,
        '予測値': predictions,
        '実測値': true_values
    }

# 結果の表示
print("Leave-One-Out Cross-Validation 結果")
print("=" * 50)

for model_name, result in results.items():
    print(f"\n{model_name}:")
    print(f"  RMSE: {result['RMSE']:.4f}")
    print(f"  MAE:  {result['MAE']:.4f}")
    print(f"  相関:  {result['相関']:.4f}")

# 詳細な予測結果の表示
print("\n" + "=" * 50)
print("詳細な予測結果")
print("=" * 50)

for i in range(len(true_values)):
    print(f"データ点 {i+1}: 実測値 = {true_values[i]}")
    for model_name in models.keys():
        pred = results[model_name]['予測値'][i]
        print(f"  {model_name}: {pred:.4f}")
    print()

# 結果の要約表
print("=" * 50)
print("結果要約表")
print("=" * 50)
print(f"{'モデル':<10} {'RMSE':<8} {'MAE':<8} {'相関':<8}")
print("-" * 40)
for model_name, result in results.items():
    print(f"{model_name:<10} {result['RMSE']:<8.4f} {result['MAE']:<8.4f} {result['相関']:<8.4f}")

# 各モデルの係数を表示（参考）
print("\n" + "=" * 50)
print("各モデルの係数（全データで学習）")
print("=" * 50)

feature_names = ['1-反論の遠さ(正規化)', '反論のラリー(正規化)', '1-反論の間隔v1(正規化)', '反論の順序の対応度(正規化)']

for model_name, model in models.items():
    model.fit(X, y)
    print(f"\n{model_name}:")
    print(f"  切片: {model.intercept_:.4f}")
    for i, coef in enumerate(model.coef_):
        print(f"  {feature_names[i]}: {coef:.4f}")

Leave-One-Out Cross-Validation 結果

重回帰:
  RMSE: 1.1392
  MAE:  0.9288
  相関:  0.2212

Ridge回帰:
  RMSE: 0.7854
  MAE:  0.5955
  相関:  0.2053

Lasso回帰:
  RMSE: 0.9043
  MAE:  0.7091
  相関:  -0.4560

詳細な予測結果
データ点 1: 実測値 = 2
  重回帰: 0.9488
  Ridge回帰: 1.9457
  Lasso回帰: 2.2050

データ点 2: 実測値 = 2
  重回帰: 1.8496
  Ridge回帰: 2.1414
  Lasso回帰: 2.2638

データ点 3: 実測値 = 2
  重回帰: 2.9548
  Ridge回帰: 2.6127
  Lasso回帰: 2.5266

データ点 4: 実測値 = 2
  重回帰: 4.5683
  Ridge回帰: 2.3247
  Lasso回帰: 2.3941

データ点 5: 実測値 = 2
  重回帰: 2.7073
  Ridge回帰: 2.5371
  Lasso回帰: 2.4680

データ点 6: 実測値 = 2
  重回帰: 1.1179
  Ridge回帰: 2.3179
  Lasso回帰: 2.5850

データ点 7: 実測値 = 4
  重回帰: 2.6050
  Ridge回帰: 2.4292
  Lasso回帰: 2.2222

データ点 8: 実測値 = 2
  重回帰: 2.0876
  Ridge回帰: 2.1928
  Lasso回帰: 2.2693

データ点 9: 実測値 = 4
  重回帰: 3.2475
  Ridge回帰: 2.4836
  Lasso回帰: 2.2222

データ点 10: 実測値 = 2
  重回帰: 2.7384
  Ridge回帰: 2.6868
  Lasso回帰: 2.8240

結果要約表
モデル        RMSE     MAE      相関      
----------------------------------------
重回帰        1.1392   0.9288   0.2212  
Ridg

In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')

# CSVファイルを読み込み
df = pd.read_csv('motion_title_content_with_graphs-gpt-a.csv')

# 目的変数と特徴量を定義
target = 'A-GPT-AVG'
features = [
    '1-反論の遠さ(正規化)',
    '反論のラリー(正規化)', 
    '1-反論の間隔v1(正規化)',
    '反論の順序の対応度(正規化)'
]

print("データの基本統計:")
print(f"サンプル数: {len(df)}")
print(f"目的変数 {target} の統計:")
print(df[target].describe())
print(f"\n特徴量の統計:")
print(df[features].describe())

# データの準備
X = df[features].values
y = df[target].values

print(f"\nX shape: {X.shape}")
print(f"y shape: {y.shape}")

# Leave-One-Out Cross-Validation
loo = LeaveOneOut()

# 回帰モデルを定義
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(alpha=1.0),
    'Lasso Regression': Lasso(alpha=0.01, max_iter=10000)
}

# 結果を格納する辞書
results = {}

for model_name, model in models.items():
    print(f"\n{model_name} の評価:")
    
    # 予測値を格納するリスト
    y_true_list = []
    y_pred_list = []
    
    # Leave-One-Out CV
    for train_idx, test_idx in loo.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        
        # モデルを訓練
        model.fit(X_train, y_train)
        
        # 予測
        y_pred = model.predict(X_test)
        
        y_true_list.extend(y_test)
        y_pred_list.extend(y_pred)
    
    y_true_array = np.array(y_true_list)
    y_pred_array = np.array(y_pred_list)
    
    # 評価指標を計算
    rmse = np.sqrt(mean_squared_error(y_true_array, y_pred_array))
    mae = mean_absolute_error(y_true_array, y_pred_array)
    correlation, _ = pearsonr(y_true_array, y_pred_array)
    
    # 結果を保存
    results[model_name] = {
        'RMSE': rmse,
        'MAE': mae,
        'Correlation': correlation
    }
    
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"相関係数: {correlation:.4f}")

# 結果の比較表
print("\n" + "="*60)
print("全モデルの性能比較")
print("="*60)
print(f"{'モデル':<20} {'RMSE':<10} {'MAE':<10} {'相関係数':<10}")
print("-"*60)

for model_name, metrics in results.items():
    print(f"{model_name:<20} {metrics['RMSE']:<10.4f} {metrics['MAE']:<10.4f} {metrics['Correlation']:<10.4f}")

# 最良のモデルを特定
best_rmse_model = min(results.keys(), key=lambda x: results[x]['RMSE'])
best_mae_model = min(results.keys(), key=lambda x: results[x]['MAE'])
best_corr_model = max(results.keys(), key=lambda x: results[x]['Correlation'])

print(f"\n最良の性能:")
print(f"最小RMSE: {best_rmse_model} ({results[best_rmse_model]['RMSE']:.4f})")
print(f"最小MAE: {best_mae_model} ({results[best_mae_model]['MAE']:.4f})")
print(f"最大相関: {best_corr_model} ({results[best_corr_model]['Correlation']:.4f})")

# 全データでモデルを学習して特徴量の重要度を確認
print(f"\n" + "="*60)
print("特徴量の重要度（全データで学習）")
print("="*60)

for model_name, model in models.items():
    model.fit(X, y)
    
    print(f"\n{model_name}:")
    if hasattr(model, 'coef_'):
        for i, feature in enumerate(features):
            print(f"  {feature}: {model.coef_[i]:.4f}")
        if hasattr(model, 'intercept_'):
            print(f"  切片: {model.intercept_:.4f}")

# 予測値と実際値の散布図データ（最良の相関を持つモデル）
print(f"\n{best_corr_model}での予測値と実際値:")
best_model = models[best_corr_model]
best_model.fit(X, y)
y_pred_all = best_model.predict(X)

print("実際値 vs 予測値:")
for i in range(len(y)):
    print(f"{i+1:2d}: 実際={y[i]:.2f}, 予測={y_pred_all[i]:.2f}, 差={abs(y[i]-y_pred_all[i]):.2f}")

データの基本統計:
サンプル数: 20
目的変数 A-GPT-AVG の統計:
count    20.000000
mean      2.525000
std       0.638151
min       2.000000
25%       2.000000
50%       2.500000
75%       2.500000
max       4.000000
Name: A-GPT-AVG, dtype: float64

特徴量の統計:
       1-反論の遠さ(正規化)  反論のラリー(正規化)  1-反論の間隔v1(正規化)  反論の順序の対応度(正規化)
count     20.000000    20.000000       20.000000       20.000000
mean       0.437041     0.579683        0.573269        0.604745
std        0.182180     0.189495        0.325688        0.219043
min        0.000000     0.206478        0.000000        0.230179
25%        0.357639     0.480521        0.247405        0.418330
50%        0.447222     0.556381        0.595089        0.571207
75%        0.532366     0.653846        0.882295        0.775363
max        0.843750     1.000000        1.000000        1.000000

X shape: (20, 4)
y shape: (20,)

Linear Regression の評価:
RMSE: 0.4295
MAE: 0.3068
相関係数: 0.7284

Ridge Regression の評価:
RMSE: 0.5229
MAE: 0.3673
相関係数: 0.6123

Lasso Regression の評価:
RMS