In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import cross_val_score

# ランダムなデータフレームを生成
np.random.seed(0)  # 実行ごとに同じ結果を得るためのシード値
data = np.random.rand(834, 100)  # 834行100列のデータフレーム
df = pd.DataFrame(data, columns=[f'col{i}' for i in range(1, 101)])
df['target'] = np.random.randint(0, 2, df.shape[0])  # ランダムな二値を目的変数として追加

# データフレームを訓練データとテストデータに分割
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# いくつかのモデルを試す
models = [
    {"name": "Logistic Regression", "model": LogisticRegression(max_iter=1000)},
    {"name": "Decision Tree", "model": DecisionTreeClassifier()},
    {"name": "Random Forest", "model": RandomForestClassifier()},
    {"name": "Gradient Boosting", "model": GradientBoostingClassifier()},
]

for m in models:
    model = m["model"]
    cv_scores = cross_val_score(model, X_train, y_train, cv=5)  # 5分割交差検証を行う
    print(f"Model: {m['name']}")
    print(f"Cross-validation scores: {cv_scores}")
    print(f"Mean cross-validation score: {np.mean(cv_scores)}")
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Test Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"Confusion Matrix:\n {confusion_matrix(y_test, y_pred)}")
    print(f"Classification Report:\n {classification_report(y_test, y_pred)}")
    
    print("\n-----------------------------------\n")


Model: Logistic Regression
Cross-validation scores: [0.52985075 0.5        0.4962406  0.36842105 0.5112782 ]
Mean cross-validation score: 0.4811581191785434
Test Accuracy: 0.5449101796407185
Confusion Matrix:
 [[46 44]
 [32 45]]
Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.51      0.55        90
           1       0.51      0.58      0.54        77

    accuracy                           0.54       167
   macro avg       0.55      0.55      0.54       167
weighted avg       0.55      0.54      0.55       167


-----------------------------------

Model: Decision Tree
Cross-validation scores: [0.44776119 0.47014925 0.4962406  0.45864662 0.54135338]
Mean cross-validation score: 0.4828302098529907
Test Accuracy: 0.5029940119760479
Confusion Matrix:
 [[50 40]
 [43 34]]
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.56      0.55        90
           1       0.46   