In [5]:
# 1회
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, make_scorer

# 데이터 로딩
file_path = 'real_ks200.csv'
df = pd.read_csv(file_path, index_col="날짜", encoding='utf-8')

# 1. 레이블 인코딩
encoder = LabelEncoder()
df['레이블'] = encoder.fit_transform(df['레이블'])

# 인코딩 확인
encoding_check = {
    "classes": encoder.classes_,
    "encoded_labels": df['레이블'].value_counts()
}
print("Encoding Check: ", encoding_check)

# 2. 학습 데이터와 테스트 데이터 분리
X = df.drop('레이블', axis=1)
y = df['레이블']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# 3. GridSearchCV 설정 및 학습
param_grid = {
    'n_estimators': [50,51,52,53,54,55],
    'max_depth': [None,1,2,3,4,5],
    'min_samples_split': [2,3,4,5],
    'min_samples_leaf': [1, 2,3, 4]
}

model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring=make_scorer(accuracy_score), verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# 4. 최적 하이퍼파라미터 확인
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters: ", best_params)
print("Best Score: ", best_score)

# 5. 모델 평가 및 표 출력
def evaluate_and_display_model(y_test, y_pred):
    metrics = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='weighted'),
        "Recall": recall_score(y_test, y_pred, average='weighted'),
        "F1 Score": f1_score(y_test, y_pred, average='weighted')
    }
    
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    
    # 평가 메트릭을 데이터프레임으로 변환 및 출력
    metrics_df = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Score'])
    print("\nScores:\n", metrics_df)
    
    return metrics_df

# 6. 예측 및 평가
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
evaluate_and_display_model(y_test, y_pred)


Encoding Check:  {'classes': array(['down', 'neutral', 'up'], dtype=object), 'encoded_labels': 레이블
0    1357
2     847
1     719
Name: count, dtype: int64}
Fitting 5 folds for each of 576 candidates, totalling 2880 fits


  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Best Parameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 52}
Best Score:  0.8778150158029698

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.91      0.94       407
           1       0.78      0.82      0.80       216
           2       0.89      0.91      0.90       254

    accuracy                           0.89       877
   macro avg       0.87      0.88      0.88       877
weighted avg       0.89      0.89      0.89       877


Scores:
               Score
Accuracy   0.890536
Precision  0.893450
Recall     0.890536
F1 Score   0.891561


Unnamed: 0,Score
Accuracy,0.890536
Precision,0.89345
Recall,0.890536
F1 Score,0.891561


In [6]:
# 2회
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, make_scorer

# 데이터 로딩
file_path = 'real_ks200.csv'
df = pd.read_csv(file_path, index_col="날짜", encoding='utf-8')

# 1. 레이블 인코딩
encoder = LabelEncoder()
df['레이블'] = encoder.fit_transform(df['레이블'])

# 인코딩 확인
encoding_check = {
    "classes": encoder.classes_,
    "encoded_labels": df['레이블'].value_counts()
}
print("Encoding Check: ", encoding_check)

# 2. 학습 데이터와 테스트 데이터 분리
X = df.drop('레이블', axis=1)
y = df['레이블']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# 3. GridSearchCV 설정 및 학습
param_grid = {
    'n_estimators': [30, 40, 50, 60, 70],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 3, 4],
    'min_samples_leaf': [1, 2, 3]
}


model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring=make_scorer(accuracy_score), verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# 4. 최적 하이퍼파라미터 확인
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters: ", best_params)
print("Best Score: ", best_score)

# 5. 모델 평가 및 표 출력
def evaluate_and_display_model(y_test, y_pred):
    metrics = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='weighted'),
        "Recall": recall_score(y_test, y_pred, average='weighted'),
        "F1 Score": f1_score(y_test, y_pred, average='weighted')
    }
    
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    
    # 평가 메트릭을 데이터프레임으로 변환 및 출력
    metrics_df = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Score'])
    print("\nScores:\n", metrics_df)
    
    return metrics_df

# 6. 예측 및 평가
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
evaluate_and_display_model(y_test, y_pred)


Encoding Check:  {'classes': array(['down', 'neutral', 'up'], dtype=object), 'encoded_labels': 레이블
0    1357
2     847
1     719
Name: count, dtype: int64}
Fitting 5 folds for each of 225 candidates, totalling 1125 fits
Best Parameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Best Score:  0.8778138231260064

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.91      0.94       407
           1       0.78      0.82      0.80       216
           2       0.89      0.92      0.90       254

    accuracy                           0.89       877
   macro avg       0.88      0.89      0.88       877
weighted avg       0.90      0.89      0.89       877


Scores:
               Score
Accuracy   0.892816
Precision  0.895811
Recall     0.892816
F1 Score   0.893836


  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Unnamed: 0,Score
Accuracy,0.892816
Precision,0.895811
Recall,0.892816
F1 Score,0.893836


In [7]:
# 3회
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, make_scorer

# 데이터 로딩
file_path = 'real_ks200.csv'
df = pd.read_csv(file_path, index_col="날짜", encoding='utf-8')

# 1. 레이블 인코딩
encoder = LabelEncoder()
df['레이블'] = encoder.fit_transform(df['레이블'])

# 인코딩 확인
encoding_check = {
    "classes": encoder.classes_,
    "encoded_labels": df['레이블'].value_counts()
}
print("Encoding Check: ", encoding_check)

# 2. 학습 데이터와 테스트 데이터 분리
X = df.drop('레이블', axis=1)
y = df['레이블']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# 3. GridSearchCV 설정 및 학습
param_grid = {
    'n_estimators': [45, 50, 55],
    'max_depth': [None, 40, 50, 60],
    'min_samples_split': [2],
    'min_samples_leaf': [1]
}



model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring=make_scorer(accuracy_score), verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# 4. 최적 하이퍼파라미터 확인
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters: ", best_params)
print("Best Score: ", best_score)

# 5. 모델 평가 및 표 출력
def evaluate_and_display_model(y_test, y_pred):
    metrics = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='weighted'),
        "Recall": recall_score(y_test, y_pred, average='weighted'),
        "F1 Score": f1_score(y_test, y_pred, average='weighted')
    }
    
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    
    # 평가 메트릭을 데이터프레임으로 변환 및 출력
    metrics_df = pd.DataFrame(metrics.values(), index=metrics.keys(), columns=['Score'])
    print("\nScores:\n", metrics_df)
    
    return metrics_df

# 6. 예측 및 평가
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
evaluate_and_display_model(y_test, y_pred)


Encoding Check:  {'classes': array(['down', 'neutral', 'up'], dtype=object), 'encoded_labels': 레이블
0    1357
2     847
1     719
Name: count, dtype: int64}
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best Parameters:  {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 55}
Best Score:  0.8778150158029698

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.91      0.94       407
           1       0.78      0.82      0.80       216
           2       0.88      0.91      0.90       254

    accuracy                           0.89       877
   macro avg       0.87      0.88      0.88       877
weighted avg       0.89      0.89      0.89       877


Scores:
               Score
Accuracy   0.889396
Precision  0.892652
Recall     0.889396
F1 Score   0.890542


  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Unnamed: 0,Score
Accuracy,0.889396
Precision,0.892652
Recall,0.889396
F1 Score,0.890542
