**Hồi Quy Logistic**

1. Tải dữ liệu
2. Tạo biến Y=0 nếu giá CK giảm và biến Y=1 nếu giá CK tăng.
3. Sử dụng hàm LogisticRegression từ thư viện sklearn

**CHẠY TẤT CẢ CHỨNG KHOÁN**

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Đường dẫn đến thư mục chứa các file CSV
folder_path = r'D:\DeTaiNam2024\SoLieu\CK_HOSE_VNSTOCK'

# Lấy danh sách tất cả các file CSV trong thư mục
file_list = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
results=[]
# Vòng lặp qua từng file CSV
for file_name in file_list:
    # Đọc dữ liệu từ file CSV
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path)
    
    # Sắp xếp theo ngày
    df = df.sort_values('time')

    # Tạo cột Y: nếu giá đóng cửa hôm nay thấp hơn hôm qua, Y=0, nếu ngược lại Y=1
    df['Y'] = (df['close'].diff() > 0).astype(int)

    # Hiển thị dữ liệu với cột Y
    print(df[['time', 'close', 'Y']])
    # Shift dữ liệu Close để lấy giá của ngày trước làm đặc trưng
    df['Close_lag1'] = df['close'].shift(1)
    df['Volume_lag1'] = df['volume'].shift(1)

    # Loại bỏ các hàng chứa giá trị NaN
    df.dropna(inplace=True)

    # Chọn các đặc trưng (X) và mục tiêu (Y)
    X = df[['Close_lag1', 'Volume_lag1']]
    Y = df['Y']
    
    # Chia dữ liệu thành tập huấn luyện và kiểm tra
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
    
    # Khởi tạo mô hình Hồi quy Logistic
    model = LogisticRegression()
    
    # Huấn luyện mô hình
    model.fit(X_train, y_train)
    
    # Dự đoán trên tập kiểm tra
    y_pred = model.predict(X_test)
    
    # Tính độ chính xác
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
      # Lưu kết quả vào danh sách
    results.append({
        'file': file_name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    })
# Ghi kết quả ra file CSV
results_df = pd.DataFrame(results)
results_df.to_csv(r'D:\DeTaiNam2024\SoLieu\CK_HOSE_VNSTOCK\output\logistic_regression_results_HOSE.csv', index=False)

            time  close  Y
0     2015-01-05   5.60  0
1     2015-01-06   5.68  1
2     2015-01-07   5.88  1
3     2015-01-08   5.76  0
4     2015-01-09   5.64  0
...          ...    ... ..
2416  2024-09-13   9.77  1
2417  2024-09-16   9.85  1
2418  2024-09-17   9.85  0
2419  2024-09-18   9.80  0
2420  2024-09-19   9.81  1

[2421 rows x 3 columns]
            time  close  Y
0     2015-01-05   7.18  0
1     2015-01-06   7.13  0
2     2015-01-07   7.18  1
3     2015-01-08   7.23  1
4     2015-01-09   6.78  0
...          ...    ... ..
2420  2024-09-13   7.40  1
2421  2024-09-16   7.45  1
2422  2024-09-17   7.45  0
2423  2024-09-18   7.30  0
2424  2024-09-19   7.60  1

[2425 rows x 3 columns]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


           time  close  Y
0    2021-03-24   9.51  0
1    2021-03-25   9.13  0
2    2021-03-26   8.50  0
3    2021-03-29   8.24  0
4    2021-03-30   8.80  1
..          ...    ... ..
868  2024-09-13   3.86  0
869  2024-09-16   3.79  0
870  2024-09-17   3.71  0
871  2024-09-18   3.60  0
872  2024-09-19   3.64  1

[873 rows x 3 columns]
            time  close  Y
0     2018-06-12   0.00  0
1     2018-06-13   9.26  1
2     2018-06-14   9.26  0
3     2018-06-15   9.26  0
4     2018-06-18   9.26  0
...          ...    ... ..
1547  2024-09-12  12.40  1
1548  2024-09-16  12.75  1
1549  2024-09-17  13.30  1
1550  2024-09-18  14.00  1
1551  2024-09-19  13.50  0

[1552 rows x 3 columns]
            time  close  Y
0     2020-03-18   9.25  0
1     2020-03-19   9.89  1
2     2020-03-20  10.57  1
3     2020-03-23  11.28  1
4     2020-03-24  12.07  1
...          ...    ... ..
1123  2024-09-13   4.30  0
1124  2024-09-16   4.25  0
1125  2024-09-17   4.24  0
1126  2024-09-18   4.36  1
1127  2024-09-19  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  26.35  0
1     2015-01-06  26.35  0
2     2015-01-07  26.10  0
3     2015-01-08  27.09  1
4     2015-01-09  28.56  1
...          ...    ... ..
2407  2024-09-11  41.80  0
2408  2024-09-13  41.80  0
2409  2024-09-16  43.50  1
2410  2024-09-17  41.70  0
2411  2024-09-19  40.50  0

[2412 rows x 3 columns]
            time  close  Y
0     2015-01-05   2.80  0
1     2015-01-06   2.84  1
2     2015-01-07   2.87  1
3     2015-01-08   2.89  1
4     2015-01-09   2.91  1
...          ...    ... ..
2416  2024-09-13  24.40  0
2417  2024-09-16  24.15  0
2418  2024-09-17  24.50  1
2419  2024-09-18  24.65  1
2420  2024-09-19  24.80  1

[2421 rows x 3 columns]
            time  close  Y
0     2015-01-05   6.20  0
1     2015-01-06   5.99  0
2     2015-01-07   5.82  0
3     2015-01-08   5.82  0
4     2015-01-09   6.13  1
...          ...    ... ..
2421  2024-09-13  13.80  0
2422  2024-09-16  13.85  1
2423  2024-09-17  13.75  0
2424  2024-09-18  13.75  0
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   2.51  0
1     2015-01-06   2.51  0
2     2015-01-07   2.33  0
3     2015-01-08   2.33  0
4     2015-01-09   2.40  1
...          ...    ... ..
2412  2024-09-13  29.95  0
2413  2024-09-16  30.00  1
2414  2024-09-17  30.00  0
2415  2024-09-18  30.25  1
2416  2024-09-19  30.20  0

[2417 rows x 3 columns]
            time  close  Y
0     2016-06-29   4.35  0
1     2016-06-30   4.64  1
2     2016-07-01   4.96  1
3     2016-07-04   4.64  0
4     2016-07-05   4.32  0
...          ...    ... ..
2054  2024-09-13   9.87  0
2055  2024-09-16   9.88  1
2056  2024-09-17   9.95  1
2057  2024-09-18   9.94  0
2058  2024-09-19  10.00  1

[2059 rows x 3 columns]
            time  close  Y
0     2020-01-09  21.02  0
1     2020-01-10  15.49  0
2     2020-01-13  15.36  0
3     2020-01-14  15.03  0
4     2020-01-15  15.11  1
...          ...    ... ..
1167  2024-09-13  16.90  1
1168  2024-09-16  16.50  0
1169  2024-09-17  16.70  1
1170  2024-09-18  16.75  1
1171 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   7.63  0
1     2015-01-06   7.63  0
2     2015-01-07   7.63  0
3     2015-01-08   7.30  0
4     2015-01-09   7.56  1
...          ...    ... ..
2421  2024-09-13   3.71  1
2422  2024-09-16   3.96  1
2423  2024-09-17   4.23  1
2424  2024-09-18   4.52  1
2425  2024-09-19   4.83  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   5.88  0
1     2015-01-06   5.88  0
2     2015-01-07   5.98  1
3     2015-01-08   5.98  0
4     2015-01-09   6.07  1
...          ...    ... ..
2421  2024-09-13  17.75  1
2422  2024-09-16  17.65  0
2423  2024-09-17  18.20  1
2424  2024-09-18  18.35  1
2425  2024-09-19  18.40  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   3.05  0
1     2015-01-06   3.08  1
2     2015-01-07   3.27  1
3     2015-01-08   3.40  1
4     2015-01-09   3.36  0
...          ...    ... ..
2421  2024-09-13  31.55  0
2422  2024-09-16  31.10  0
2423  2024-09-17  31.50  1
2424  2024-09-18  31.65  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2020-07-28  30.17  0
1     2020-07-29  32.23  1
2     2020-07-30  34.47  1
3     2020-07-31  36.53  1
4     2020-08-03  34.53  0
...          ...    ... ..
1032  2024-09-13   6.24  1
1033  2024-09-16   6.41  1
1034  2024-09-17   6.23  0
1035  2024-09-18   6.28  1
1036  2024-09-19   6.27  0

[1037 rows x 3 columns]
           time  close  Y
0    2020-09-24  27.27  0
1    2020-09-25  29.17  1
2    2020-09-28  31.17  1
3    2020-09-29  33.33  1
4    2020-09-30  35.64  1
..          ...    ... ..
979  2024-09-13  19.20  1
980  2024-09-16  19.15  0
981  2024-09-17  19.30  1
982  2024-09-18  19.15  0
983  2024-09-19  19.25  1

[984 rows x 3 columns]
            time  close  Y
0     2015-01-05   3.94  0
1     2015-01-06   3.99  1
2     2015-01-07   3.99  0
3     2015-01-08   3.94  0
4     2015-01-09   3.99  1
...          ...    ... ..
2421  2024-09-13   9.02  1
2422  2024-09-16   8.95  0
2423  2024-09-17   9.03  1
2424  2024-09-18   9.11  1
2425  2024-09-19  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2018-01-04  38.21  0
1     2018-01-05  40.83  1
2     2018-01-08  43.66  1
3     2018-01-09  46.70  1
4     2018-01-10  49.96  1
...          ...    ... ..
1670  2024-09-13  56.60  0
1671  2024-09-16  56.10  0
1672  2024-09-17  56.10  0
1673  2024-09-18  55.90  0
1674  2024-09-19  56.50  1

[1675 rows x 3 columns]
           time  close  Y
0    2021-12-03   9.21  0
1    2021-12-06   9.84  1
2    2021-12-07  10.51  1
3    2021-12-08  11.24  1
4    2021-12-09  12.03  1
..          ...    ... ..
691  2024-09-13  20.00  1
692  2024-09-16  19.60  0
693  2024-09-17  19.85  1
694  2024-09-18  20.00  1
695  2024-09-19  20.65  1

[696 rows x 3 columns]
            time  close  Y
0     2015-01-05  46.69  0
1     2015-01-06  49.61  1
2     2015-01-07  48.36  0
3     2015-01-08  48.36  0
4     2015-01-09  48.36  0
...          ...    ... ..
2414  2024-09-13  51.00  0
2415  2024-09-16  51.00  0
2416  2024-09-17  50.50  0
2417  2024-09-18  50.50  0
2418  2024-09-19  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-07-16   8.47  0
1     2015-07-17   8.99  1
2     2015-07-20   9.47  1
3     2015-07-21   8.80  0
4     2015-07-22   8.19  0
...          ...    ... ..
2293  2024-09-13   6.40  1
2294  2024-09-16   6.36  0
2295  2024-09-17   6.44  1
2296  2024-09-18   6.43  0
2297  2024-09-19   6.44  1

[2298 rows x 3 columns]
            time  close  Y
0     2018-02-21  23.21  0
1     2018-02-22  23.21  0
2     2018-02-23  21.35  0
3     2018-02-26  21.26  0
4     2018-02-27  22.28  1
...          ...    ... ..
1634  2024-09-13  70.90  0
1635  2024-09-16  70.30  0
1636  2024-09-17  71.40  1
1637  2024-09-18  71.40  0
1638  2024-09-19  71.90  1

[1639 rows x 3 columns]
            time  close  Y
0     2015-10-07   9.52  0
1     2015-10-08   9.41  0
2     2015-10-09   9.38  0
3     2015-10-12   8.75  0
4     2015-10-13   8.30  0
...          ...    ... ..
2235  2024-09-13  45.05  1
2236  2024-09-16  44.60  0
2237  2024-09-17  44.80  1
2238  2024-09-18  44.30  0
2239 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   6.85  0
1     2015-01-06   6.81  0
2     2015-01-07   6.81  0
3     2015-01-08   6.89  1
4     2015-01-09   6.89  0
...          ...    ... ..
2421  2024-09-13  22.85  1
2422  2024-09-16  22.50  0
2423  2024-09-17  22.60  1
2424  2024-09-18  22.75  1
2425  2024-09-19  22.80  1

[2426 rows x 3 columns]
            time   close  Y
0     2015-01-05   20.01  0
1     2015-01-06   20.56  1
2     2015-01-07   20.97  1
3     2015-01-08   20.97  0
4     2015-01-09   21.10  1
...          ...     ... ..
2421  2024-09-13  111.90  1
2422  2024-09-16  115.10  1
2423  2024-09-17  115.70  1
2424  2024-09-18  113.70  0
2425  2024-09-19  121.60  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   4.57  0
1     2015-01-06   4.35  0
2     2015-01-07   4.48  1
3     2015-01-08   4.48  0
4     2015-01-09   4.31  0
...          ...    ... ..
2421  2024-09-13  12.60  0
2422  2024-09-16  12.55  0
2423  2024-09-17  12.50  0
2424  2024-09-18  12

  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   5.55  0
1     2015-01-06   5.66  1
2     2015-01-07   5.60  0
3     2015-01-08   5.45  0
4     2015-01-09   5.55  1
...          ...    ... ..
2415  2024-09-13  47.45  1
2416  2024-09-16  46.70  0
2417  2024-09-17  47.70  1
2418  2024-09-18  48.40  1
2419  2024-09-19  48.95  1

[2420 rows x 3 columns]
            time  close  Y
0     2015-01-05   5.46  0
1     2015-01-06   5.54  1
2     2015-01-07   5.77  1
3     2015-01-08   5.73  0
4     2015-01-09   5.70  0
...          ...    ... ..
2421  2024-09-13  12.95  1
2422  2024-09-16  12.90  0
2423  2024-09-17  12.95  1
2424  2024-09-18  12.80  0
2425  2024-09-19  12.95  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05  20.65  0
1     2015-01-06  20.65  0
2     2015-01-07  20.65  0
3     2015-01-08  20.65  0
4     2015-01-09  20.65  0
...          ...    ... ..
2418  2024-09-10  38.90  0
2419  2024-09-11  38.80  0
2420  2024-09-12  38.80  0
2421  2024-09-13  38.80  0
2422 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2017-07-20  12.44  0
1     2017-07-21  13.31  1
2     2017-07-24  14.22  1
3     2017-07-25  15.20  1
4     2017-07-26  16.25  1
...          ...    ... ..
1788  2024-09-13  46.10  0
1789  2024-09-16  45.60  0
1790  2024-09-17  45.20  0
1791  2024-09-18  44.90  0
1792  2024-09-19  47.05  1

[1793 rows x 3 columns]
            time  close  Y
0     2015-01-05  12.26  0
1     2015-01-06  12.34  1
2     2015-01-07  12.42  1
3     2015-01-08  12.58  1
4     2015-01-09  12.62  1
...          ...    ... ..
2420  2024-09-13  17.30  0
2421  2024-09-16  17.30  0
2422  2024-09-17  17.30  0
2423  2024-09-18  17.45  1
2424  2024-09-19  17.50  1

[2425 rows x 3 columns]
            time  close  Y
0     2015-01-05   5.53  0
1     2015-01-06   5.36  0
2     2015-01-07   5.36  0
3     2015-01-08   5.36  0
4     2015-01-09   5.43  1
...          ...    ... ..
2421  2024-09-13   6.00  1
2422  2024-09-16   6.02  1
2423  2024-09-17   6.03  1
2424  2024-09-18   6.05  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   1.96  0
1     2015-01-06   1.96  0
2     2015-01-07   1.96  0
3     2015-01-08   1.92  0
4     2015-01-09   1.92  0
...          ...    ... ..
2421  2024-09-13   9.05  1
2422  2024-09-16   8.98  0
2423  2024-09-17   9.02  1
2424  2024-09-18   9.23  1
2425  2024-09-19   9.18  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   3.89  0
1     2015-01-06   3.89  0
2     2015-01-07   4.01  1
3     2015-01-08   3.93  0
4     2015-01-09   3.89  0
...          ...    ... ..
2421  2024-09-13  17.80  0
2422  2024-09-16  17.70  0
2423  2024-09-17  17.70  0
2424  2024-09-18  17.75  1
2425  2024-09-19  17.50  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   6.06  0
1     2015-01-06   5.98  0
2     2015-01-07   6.06  1
3     2015-01-08   6.15  1
4     2015-01-09   6.23  1
...          ...    ... ..
2417  2024-09-13  33.20  0
2418  2024-09-16  32.70  0
2419  2024-09-17  32.65  0
2420  2024-09-18  33.00  1
2421 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  14.60  0
1     2015-01-06  14.67  1
2     2015-01-07  14.31  0
3     2015-01-08  14.02  0
4     2015-01-09  13.95  0
...          ...    ... ..
2421  2024-09-13  15.00  1
2422  2024-09-16  14.85  0
2423  2024-09-17  15.20  1
2424  2024-09-18  15.15  0
2425  2024-09-19  15.25  1

[2426 rows x 3 columns]
            time  close  Y
0     2020-03-25   5.65  0
1     2020-03-26   5.65  0
2     2020-03-27   5.78  1
3     2020-03-30   5.37  0
4     2020-03-31   5.00  0
...          ...    ... ..
1118  2024-09-13  25.00  0
1119  2024-09-16  24.90  0
1120  2024-09-17  25.65  1
1121  2024-09-18  25.65  0
1122  2024-09-19  26.45  1

[1123 rows x 3 columns]
            time  close  Y
0     2015-01-05   8.01  0
1     2015-01-06   8.06  1
2     2015-01-07   8.27  1
3     2015-01-08   8.24  0
4     2015-01-09   7.72  0
...          ...    ... ..
2420  2024-09-13  43.50  0
2421  2024-09-16  43.80  1
2422  2024-09-17  43.90  1
2423  2024-09-18  44.00  1
2424 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  17.74  0
1     2015-01-06  17.79  1
2     2015-01-07  16.75  0
3     2015-01-08  17.01  1
4     2015-01-09  17.43  1
...          ...    ... ..
2421  2024-09-13  38.10  1
2422  2024-09-16  38.80  1
2423  2024-09-17  38.80  0
2424  2024-09-18  38.65  0
2425  2024-09-19  38.40  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05  10.57  0
1     2015-01-06  10.57  0
2     2015-01-07  10.31  0
3     2015-01-08  10.31  0
4     2015-01-09  10.31  0
...          ...    ... ..
2387  2024-07-25  46.00  0
2388  2024-07-30  49.20  1
2389  2024-08-22  45.90  0
2390  2024-08-23  45.90  0
2391  2024-08-29  45.90  0

[2392 rows x 3 columns]
            time  close  Y
0     2015-01-05   3.29  0
1     2015-01-06   3.29  0
2     2015-01-07   3.18  0
3     2015-01-08   3.22  1
4     2015-01-09   3.26  1
...          ...    ... ..
2421  2024-09-13  51.10  1
2422  2024-09-16  50.40  0
2423  2024-09-17  50.90  1
2424  2024-09-18  52.50  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   6.12  0
1     2015-01-06   6.12  0
2     2015-01-07   6.12  0
3     2015-01-08   6.08  0
4     2015-01-09   6.08  0
...          ...    ... ..
2396  2024-08-28   9.89  0
2397  2024-09-04   9.89  0
2398  2024-09-05   9.89  0
2399  2024-09-09   9.89  0
2400  2024-09-12   9.80  0

[2401 rows x 3 columns]
            time  close  Y
0     2015-01-05   4.76  0
1     2015-01-06   4.76  0
2     2015-01-07   4.82  1
3     2015-01-08   4.70  0
4     2015-01-09   4.76  1
...          ...    ... ..
2421  2024-09-13   8.13  0
2422  2024-09-16   8.08  0
2423  2024-09-17   8.12  1
2424  2024-09-18   8.14  1
2425  2024-09-19   8.20  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05  11.77  0
1     2015-01-06  11.66  0
2     2015-01-07  11.81  1
3     2015-01-08  11.77  0
4     2015-01-09  11.88  1
...          ...    ... ..
2421  2024-09-13  35.15  1
2422  2024-09-16  34.80  0
2423  2024-09-17  35.10  1
2424  2024-09-18  35.20  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  15.64  0
1     2015-01-06  15.64  0
2     2015-01-07  15.64  0
3     2015-01-08  15.64  0
4     2015-01-09  15.64  0
...          ...    ... ..
2406  2024-09-06  28.90  1
2407  2024-09-11  28.90  0
2408  2024-09-12  26.90  0
2409  2024-09-17  27.00  1
2410  2024-09-18  26.95  0

[2411 rows x 3 columns]
            time  close  Y
0     2018-08-20  13.15  0
1     2018-08-21  14.05  1
2     2018-08-22  15.01  1
3     2018-08-23  16.04  1
4     2018-08-24  16.43  1
...          ...    ... ..
1516  2024-09-13   6.65  0
1517  2024-09-16   6.59  0
1518  2024-09-17   6.60  1
1519  2024-09-18   6.56  0
1520  2024-09-19   6.58  1

[1521 rows x 3 columns]
            time  close  Y
0     2018-09-05   7.86  0
1     2018-09-06   7.86  0
2     2018-09-07   7.89  1
3     2018-09-10   7.77  0
4     2018-09-11   7.82  1
...          ...    ... ..
1505  2024-09-13   6.94  0
1506  2024-09-16   7.02  1
1507  2024-09-17   6.95  0
1508  2024-09-18   7.01  1
1509 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  17.27  0
1     2015-01-06  17.27  0
2     2015-01-07  17.60  1
3     2015-01-08  17.56  0
4     2015-01-09  17.52  0
...          ...    ... ..
2421  2024-09-13  12.70  1
2422  2024-09-16  12.65  0
2423  2024-09-17  12.60  0
2424  2024-09-18  12.60  0
2425  2024-09-19  12.55  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-03-18   3.78  0
1     2015-03-19   3.83  1
2     2015-03-20   3.74  0
3     2015-03-23   3.49  0
4     2015-03-24   3.48  0
...          ...    ... ..
2375  2024-09-13  39.15  0
2376  2024-09-16  38.20  0
2377  2024-09-17  39.00  1
2378  2024-09-18  38.80  0
2379  2024-09-19  38.95  1

[2380 rows x 3 columns]
            time  close  Y
0     2015-01-05  25.28  0
1     2015-01-06  26.16  1
2     2015-01-07  25.94  0
3     2015-01-08  25.94  0
4     2015-01-09  27.27  1
...          ...    ... ..
2421  2024-09-13  61.60  0
2422  2024-09-16  59.00  0
2423  2024-09-17  60.30  1
2424  2024-09-18  60.60  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   8.17  0
1     2015-01-06   8.23  1
2     2015-01-07   8.47  1
3     2015-01-08   8.29  0
4     2015-01-09   8.53  1
...          ...    ... ..
2421  2024-09-13  34.90  0
2422  2024-09-16  34.65  0
2423  2024-09-17  34.95  1
2424  2024-09-18  35.70  1
2425  2024-09-19  36.00  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   6.94  0
1     2015-01-06   6.87  0
2     2015-01-07   6.94  1
3     2015-01-08   6.55  0
4     2015-01-09   6.62  1
...          ...    ... ..
2421  2024-09-13  15.10  0
2422  2024-09-16  15.00  0
2423  2024-09-17  15.25  1
2424  2024-09-18  15.30  1
2425  2024-09-19  15.45  1

[2426 rows x 3 columns]
            time   close  Y
0     2017-10-31   10.15  0
1     2017-11-01    9.54  0
2     2017-11-02    8.52  0
3     2017-11-03    7.83  0
4     2017-11-06    8.99  1
...          ...     ... ..
1715  2024-09-13  125.00  1
1716  2024-09-16  124.00  0
1717  2024-09-17  124.60  1
1718  2024-09-18  133.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2017-01-16  15.74  0
1     2017-01-17  18.08  1
2     2017-01-18  18.21  1
3     2017-01-19  17.54  0
4     2017-01-20  17.54  0
...          ...    ... ..
1908  2024-09-13  42.85  0
1909  2024-09-16  41.65  0
1910  2024-09-17  42.00  1
1911  2024-09-18  41.70  0
1912  2024-09-19  41.70  0

[1913 rows x 3 columns]
            time  close  Y
0     2015-01-05   8.42  0
1     2015-01-06   8.15  0
2     2015-01-07   8.27  1
3     2015-01-08   8.23  0
4     2015-01-09   8.23  0
...          ...    ... ..
2415  2024-09-13  13.10  0
2416  2024-09-16  13.00  0
2417  2024-09-17  13.00  0
2418  2024-09-18  13.00  0
2419  2024-09-19  13.00  0

[2420 rows x 3 columns]
            time  close  Y
0     2015-01-05   3.83  0
1     2015-01-06   3.79  0
2     2015-01-07   3.79  0
3     2015-01-08   3.79  0
4     2015-01-09   3.79  0
...          ...    ... ..
2415  2024-09-13  11.00  1
2416  2024-09-16  11.00  0
2417  2024-09-17  11.10  1
2418  2024-09-18  11.10  0
2419 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   5.48  0
1     2015-01-06   5.48  0
2     2015-01-07   5.48  0
3     2015-01-08   5.48  0
4     2015-01-09   5.40  0
...          ...    ... ..
2421  2024-09-13  40.25  1
2422  2024-09-16  40.00  0
2423  2024-09-17  40.50  1
2424  2024-09-18  40.50  0
2425  2024-09-19  40.25  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   4.72  0
1     2015-01-06   4.58  0
2     2015-01-07   4.67  1
3     2015-01-08   4.78  1
4     2015-01-09   4.69  0
...          ...    ... ..
2421  2024-09-13  37.10  0
2422  2024-09-16  36.95  0
2423  2024-09-17  37.00  1
2424  2024-09-18  37.15  1
2425  2024-09-19  37.60  1

[2426 rows x 3 columns]
            time   close  Y
0     2015-01-05   43.60  0
1     2015-01-06   43.60  0
2     2015-01-07   43.14  0
3     2015-01-08   43.14  0
4     2015-01-09   43.14  0
...          ...     ... ..
2421  2024-09-13  106.60  0
2422  2024-09-16  106.60  0
2423  2024-09-17  107.30  1
2424  2024-09-18  107.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   4.86  0
1     2015-01-06   4.95  1
2     2015-01-07   4.86  0
3     2015-01-08   4.86  0
4     2015-01-09   4.86  0
...          ...    ... ..
2421  2024-09-13   8.70  1
2422  2024-09-16   8.70  0
2423  2024-09-17   8.72  1
2424  2024-09-18   8.73  1
2425  2024-09-19   8.70  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   5.75  0
1     2015-01-06   5.79  1
2     2015-01-07   5.75  0
3     2015-01-08   5.75  0
4     2015-01-09   5.79  1
...          ...    ... ..
2421  2024-09-13  22.00  1
2422  2024-09-16  21.90  0
2423  2024-09-17  22.50  1
2424  2024-09-18  22.65  1
2425  2024-09-19  22.90  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   9.44  0
1     2015-01-06   9.71  1
2     2015-01-07   9.80  1
3     2015-01-08   9.62  0
4     2015-01-09   9.80  1
...          ...    ... ..
2421  2024-09-13   1.82  0
2422  2024-09-16   1.84  1
2423  2024-09-17   1.78  0
2424  2024-09-18   1.66  0
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2017-01-12   4.37  0
1     2017-01-13   4.37  0
2     2017-01-16   5.03  1
3     2017-01-17   5.77  1
4     2017-01-18   6.63  1
...          ...    ... ..
1911  2024-09-13  53.70  1
1912  2024-09-16  54.00  1
1913  2024-09-17  56.80  1
1914  2024-09-18  56.80  0
1915  2024-09-19  56.50  0

[1916 rows x 3 columns]
            time  close  Y
0     2015-01-05  12.15  0
1     2015-01-06  12.50  1
2     2015-01-07  12.58  1
3     2015-01-08  12.50  0
4     2015-01-09  12.73  1
...          ...    ... ..
2421  2024-09-13  35.50  1
2422  2024-09-16  35.70  1
2423  2024-09-17  35.80  1
2424  2024-09-18  35.55  0
2425  2024-09-19  35.60  1

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   7.64  0
1     2015-01-06   7.56  0
2     2015-01-07   7.62  1
3     2015-01-08   7.56  0
4     2015-01-09   7.58  1
...          ...    ... ..
2421  2024-09-13  40.85  0
2422  2024-09-16  40.75  0
2423  2024-09-17  41.40  1
2424  2024-09-18  41.85  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  20.61  0
1     2015-01-06  20.78  1
2     2015-01-07  20.98  1
3     2015-01-08  20.64  0
4     2015-01-09  20.64  0
...          ...    ... ..
2421  2024-09-13  55.20  0
2422  2024-09-16  55.40  1
2423  2024-09-17  55.30  0
2424  2024-09-18  55.40  1
2425  2024-09-19  55.20  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   5.62  0
1     2015-01-06   5.62  0
2     2015-01-07   5.62  0
3     2015-01-08   5.62  0
4     2015-01-09   5.62  0
...          ...    ... ..
2418  2024-09-12   3.74  1
2419  2024-09-13   3.76  1
2420  2024-09-16   3.71  0
2421  2024-09-17   3.71  0
2422  2024-09-19   3.86  1

[2423 rows x 3 columns]
            time  close  Y
0     2015-01-05   9.30  0
1     2015-01-06   9.30  0
2     2015-01-07   8.70  0
3     2015-01-08   8.40  0
4     2015-01-09   7.90  0
...          ...    ... ..
2412  2024-09-13  12.50  1
2413  2024-09-16  12.50  0
2414  2024-09-17  11.65  0
2415  2024-09-18  11.90  1
2416 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05   6.71  0
1     2015-01-06   6.71  0
2     2015-01-07   6.71  0
3     2015-01-08   6.26  0
4     2015-01-09   6.07  0
...          ...    ... ..
2399  2024-09-05  17.25  0
2400  2024-09-10  18.00  1
2401  2024-09-12  16.75  0
2402  2024-09-13  17.90  1
2403  2024-09-17  18.95  1

[2404 rows x 3 columns]
            time  close  Y
0     2015-01-05  20.26  0
1     2015-01-06  20.04  0
2     2015-01-07  20.21  1
3     2015-01-08  20.26  1
4     2015-01-09  20.26  0
...          ...    ... ..
2421  2024-09-13  73.80  1
2422  2024-09-16  73.80  0
2423  2024-09-17  74.60  1
2424  2024-09-18  74.40  0
2425  2024-09-19  74.10  0

[2426 rows x 3 columns]
            time  close  Y
0     2015-01-05   4.30  0
1     2015-01-06   4.40  1
2     2015-01-07   4.37  0
3     2015-01-08   4.30  0
4     2015-01-09   4.37  1
...          ...    ... ..
2421  2024-09-13  15.30  1
2422  2024-09-16  14.90  0
2423  2024-09-17  15.50  1
2424  2024-09-18  15.65  1
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


           time  close  Y
0    2021-07-15  18.10  0
1    2021-07-16  18.72  1
2    2021-07-19  17.47  0
3    2021-07-20  17.47  0
4    2021-07-21  18.28  1
..          ...    ... ..
787  2024-09-13   5.78  1
788  2024-09-16   5.70  0
789  2024-09-17   5.79  1
790  2024-09-18   5.80  1
791  2024-09-19   5.98  1

[792 rows x 3 columns]
            time  close  Y
0     2015-01-05   4.52  0
1     2015-01-06   4.52  0
2     2015-01-07   4.24  0
3     2015-01-08   4.24  0
4     2015-01-09   4.15  0
...          ...    ... ..
2421  2024-09-13   3.77  1
2422  2024-09-16   3.77  0
2423  2024-09-17   3.77  0
2424  2024-09-18   3.84  1
2425  2024-09-19   3.85  1

[2426 rows x 3 columns]
            time  close  Y
0     2016-01-07   9.30  0
1     2016-01-08   9.10  0
2     2016-01-11   9.20  1
3     2016-01-12   9.20  0
4     2016-01-13   9.20  0
...          ...    ... ..
2166  2024-09-13  22.61  0
2167  2024-09-16  22.54  0
2168  2024-09-17  22.82  1
2169  2024-09-18  22.94  1
2170  2024-09-19  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2018-08-07   5.84  0
1     2018-08-08   6.38  1
2     2018-08-09   6.65  1
3     2018-08-10   5.91  0
4     2018-08-13   6.65  1
...          ...    ... ..
1517  2024-09-13  11.60  1
1518  2024-09-16  11.35  0
1519  2024-09-17  11.45  1
1520  2024-09-18  11.55  1
1521  2024-09-19  11.55  0

[1522 rows x 3 columns]
            time  close  Y
0     2017-06-08   8.77  0
1     2017-06-09   9.36  1
2     2017-06-12   9.95  1
3     2017-06-13  10.01  1
4     2017-06-14   9.61  0
...          ...    ... ..
1818  2024-09-13   6.70  1
1819  2024-09-16   6.90  1
1820  2024-09-17   6.91  1
1821  2024-09-18   7.00  1
1822  2024-09-19   7.07  1

[1823 rows x 3 columns]
            time  close  Y
0     2015-01-05   5.61  0
1     2015-01-06   5.78  1
2     2015-01-07   5.83  1
3     2015-01-08   5.72  0
4     2015-01-09   5.72  0
...          ...    ... ..
2421  2024-09-13   3.30  0
2422  2024-09-16   3.31  1
2423  2024-09-17   3.39  1
2424  2024-09-18   3.32  0
2425 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time  close  Y
0     2015-01-05  16.95  0
1     2015-01-06  16.95  0
2     2015-01-07  16.95  0
3     2015-01-08  17.20  1
4     2015-01-09  17.28  1
...          ...    ... ..
2401  2024-08-29  13.90  1
2402  2024-09-05  13.90  0
2403  2024-09-09  13.00  0
2404  2024-09-10  13.90  1
2405  2024-09-12  13.90  0

[2406 rows x 3 columns]
            time  close  Y
0     2018-10-18   3.30  0
1     2018-10-19   3.53  1
2     2018-10-22   3.77  1
3     2018-10-23   4.03  1
4     2018-10-24   4.31  1
...          ...    ... ..
1474  2024-09-13   6.40  0
1475  2024-09-16   6.38  0
1476  2024-09-17   6.40  1
1477  2024-09-18   6.45  1
1478  2024-09-19   6.43  0

[1479 rows x 3 columns]
            time  close  Y
0     2015-01-05  12.59  0
1     2015-01-06  13.08  1
2     2015-01-07  12.80  0
3     2015-01-08  12.32  0
4     2015-01-09  12.25  0
...          ...    ... ..
2417  2024-09-13   4.20  0
2418  2024-09-16   4.19  0
2419  2024-09-17   4.17  0
2420  2024-09-18   4.46  1
2421 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


            time   close  Y
0     2015-01-05    8.54  0
1     2015-01-06    8.59  1
2     2015-01-07    8.65  1
3     2015-01-08    8.56  0
4     2015-01-09    8.63  1
...          ...     ... ..
2421  2024-09-13  133.00  1
2422  2024-09-16  131.10  0
2423  2024-09-17  132.90  1
2424  2024-09-18  134.60  1
2425  2024-09-19  135.20  1

[2426 rows x 3 columns]
            time   close  Y
0     2018-04-26   41.39  0
1     2018-04-27   44.29  1
2     2018-05-02   46.36  1
3     2018-05-03   43.32  0
4     2018-05-04   41.11  0
...          ...     ... ..
1596  2024-09-13  179.00  1
1597  2024-09-16  177.00  0
1598  2024-09-17  177.20  1
1599  2024-09-18  179.80  1
1600  2024-09-19  180.50  1

[1601 rows x 3 columns]
            time  close  Y
0     2017-01-13   3.77  0
1     2017-01-16   3.51  0
2     2017-01-17   3.30  0
3     2017-01-18   3.38  1
4     2017-01-19   3.27  0
...          ...    ... ..
1914  2024-09-13  44.00  1
1915  2024-09-16  43.20  0
1916  2024-09-17  44.60  1
1917  20

  _warn_prf(average, modifier, msg_start, len(result))


         time  close  Y
0  2024-08-16   10.0  0
1  2024-08-22   10.0  0
2  2024-09-04   10.2  1


ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0

**Hồi quy Logistic sử dụng Thuật toán  Batch Gradient Descent cho Hồi quy Logistic**

In [None]:
# Hàm sigmoid
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Hàm mất mát (binary cross-entropy)
def compute_loss(Y, Y_hat):
    m = Y.shape[0]
    return -(1/m) * np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))

# Hàm Batch Gradient Descent
def batch_gradient_descent(X, Y, learning_rate=0.01, iterations=1000):
    m, n = X.shape
    # Khởi tạo tham số (weights và bias)
    weights = np.zeros(n)
    bias = 0
    losses = []

    for i in range(iterations):
        # Tính toán dự đoán Y_hat
        Z = np.dot(X, weights) + bias
        Y_hat = sigmoid(Z)

        # Tính gradient của hàm mất mát
        dw = (1/m) * np.dot(X.T, (Y_hat - Y))
        db = (1/m) * np.sum(Y_hat - Y)

        # Cập nhật trọng số
        weights -= learning_rate * dw
        bias -= learning_rate * db

        # Tính và lưu lại hàm mất mát
        loss = compute_loss(Y, Y_hat)
        losses.append(loss)

        # In ra mỗi 100 iterations
        if i % 100 == 0:
            print(f'Iteration {i}, Loss: {loss}')

    return weights, bias, losses

# Hàm dự đoán
def predict(X, weights, bias):
    Z = np.dot(X, weights) + bias
    Y_hat = sigmoid(Z)
    return np.where(Y_hat >= 0.5, 1, 0)

# Đọc dữ liệu
# Đường dẫn đến thư mục chứa các file CSV
folder_path = r'D:\DeTaiNam2024\SoLieu\CK_HOSE_VNSTOCK'

# Lấy danh sách tất cả các file CSV trong thư mục
file_list = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
results=[]
# Vòng lặp qua từng file CSV
for file_name in file_list:
    # Đọc dữ liệu từ file CSV
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path)
    
    # Sắp xếp theo ngày
    df = df.sort_values('time')

    # Tạo cột Y: nếu giá đóng cửa hôm nay thấp hơn hôm qua, Y=0, nếu ngược lại Y=1
    df['Y'] = (df['close'].diff() > 0).astype(int)

    # Shift dữ liệu Close để lấy giá của ngày trước làm đặc trưng
    df['Close_lag1'] = df['close'].shift(1)
    df['Volume_lag1'] = df['volume'].shift(1)

    # Loại bỏ các hàng chứa giá trị NaN
    df.dropna(inplace=True)

    # Chọn các đặc trưng (X) và mục tiêu (Y)
    X = df[['Close_lag1', 'Volume_lag1']]
    Y = df['Y']
    
   # Chuẩn hóa dữ liệu
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Chia dữ liệu thành tập huấn luyện và kiểm tra
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    # Huấn luyện mô hình bằng Batch Gradient Descent
    weights, bias, losses = batch_gradient_descent(X_train, Y_train, learning_rate=0.01, iterations=1000)

    # Dự đoán trên tập kiểm tra
    Y_pred = predict(X_test, weights, bias)

    # Tính độ chính xác
    accuracy = accuracy_score(Y_test, Y_pred)
    precision = precision_score(Y_test, Y_pred, average='weighted')
    recall = recall_score(Y_test, Y_pred, average='weighted')
    f1 = f1_score(Y_test, Y_pred, average='weighted')
    
      # Lưu kết quả vào danh sách
    results.append({
        'file': file_name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    })
# Ghi kết quả ra file CSV
results_df = pd.DataFrame(results)
results_df.to_csv(r'D:\DeTaiNam2024\SoLieu\CK_HOSE_VNSTOCK\output\logistic_regression_BGD_results_HOSE.csv', index=False)

Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6918393500625402
Iteration 200, Loss: 0.6910465919148521
Iteration 300, Loss: 0.6905657290735908
Iteration 400, Loss: 0.6902738332745706
Iteration 500, Loss: 0.6900965105850567
Iteration 600, Loss: 0.689988710015919
Iteration 700, Loss: 0.6899231279765206
Iteration 800, Loss: 0.6898832030459343
Iteration 900, Loss: 0.6898588815129466


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599454
Iteration 100, Loss: 0.6796093755235076
Iteration 200, Loss: 0.671389901916826
Iteration 300, Loss: 0.6663784799060483
Iteration 400, Loss: 0.6633079841132001
Iteration 500, Loss: 0.6614174675842123
Iteration 600, Loss: 0.6602481408173237
Iteration 700, Loss: 0.6595218791991777
Iteration 800, Loss: 0.6590691103811549
Iteration 900, Loss: 0.6587858811180242
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6830284221417529
Iteration 200, Loss: 0.676994819465165
Iteration 300, Loss: 0.6733739745021303
Iteration 400, Loss: 0.6711872070726282
Iteration 500, Loss: 0.6698591183308686
Iteration 600, Loss: 0.6690487415385802
Iteration 700, Loss: 0.668552368614605
Iteration 800, Loss: 0.6682473911865782
Iteration 900, Loss: 0.6680595464322187


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6501285774571038
Iteration 200, Loss: 0.6238877790582709
Iteration 300, Loss: 0.6076837388585613
Iteration 400, Loss: 0.597532474310386
Iteration 500, Loss: 0.5910830590175159
Iteration 600, Loss: 0.5869329729496883
Iteration 700, Loss: 0.5842324340035456
Iteration 800, Loss: 0.5824580856135255
Iteration 900, Loss: 0.5812825679967697
Iteration 0, Loss: 0.6931471805599454
Iteration 100, Loss: 0.690623956901945
Iteration 200, Loss: 0.6891062815212371
Iteration 300, Loss: 0.6881858727781257
Iteration 400, Loss: 0.6876227501916752
Iteration 500, Loss: 0.687275345016331
Iteration 600, Loss: 0.6870594211681926
Iteration 700, Loss: 0.6869243449964932
Iteration 800, Loss: 0.6868393732271462
Iteration 900, Loss: 0.6867856656961899
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6614395233122666
Iteration 200, Loss: 0.6421268446275473
Iteration 300, Loss: 0.6302543454291285
Iteration 400, Loss: 0.6228754339536984
Iteration

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6888107763507668
Iteration 200, Loss: 0.6865246108537955
Iteration 300, Loss: 0.6852770221220987
Iteration 400, Loss: 0.6845731665558457
Iteration 500, Loss: 0.6841635776134579
Iteration 600, Loss: 0.683918289847208
Iteration 700, Loss: 0.6837673848058671
Iteration 800, Loss: 0.6836721117063139
Iteration 900, Loss: 0.6836104116327664
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6762490727318803
Iteration 200, Loss: 0.6661394157384944
Iteration 300, Loss: 0.6600208752468708
Iteration 400, Loss: 0.6562754755763681
Iteration 500, Loss: 0.6539581577761381
Iteration 600, Loss: 0.6525098832816324
Iteration 700, Loss: 0.6515958145096664
Iteration 800, Loss: 0.6510131295649908
Iteration 900, Loss: 0.6506377540699989
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6853921992885089
Iteration 200, Loss: 0.6806704572794123
Iteration 300, Loss: 0.6777789901543548
Iteration 400, Loss: 0.6759960574336853
Iterati

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6743608217570491
Iteration 200, Loss: 0.6634575628878755
Iteration 300, Loss: 0.6570166332005372
Iteration 400, Loss: 0.6531500844352783
Iteration 500, Loss: 0.6507971121981088
Iteration 600, Loss: 0.6493488926248093
Iteration 700, Loss: 0.6484490089628872
Iteration 800, Loss: 0.6478852235572905
Iteration 900, Loss: 0.647529361090475
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6847406040091579
Iteration 200, Loss: 0.6798485059075431
Iteration 300, Loss: 0.676961736941368
Iteration 400, Loss: 0.6752376722475506
Iteration 500, Loss: 0.6741977783969632
Iteration 600, Loss: 0.6735654953132002
Iteration 700, Loss: 0.6731785016300618
Iteration 800, Loss: 0.6729403038011553
Iteration 900, Loss: 0.6727929524118151


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6370892324064454
Iteration 200, Loss: 0.6030707858154782
Iteration 300, Loss: 0.5820581135095878
Iteration 400, Loss: 0.5688165316960967
Iteration 500, Loss: 0.5603122094414066
Iteration 600, Loss: 0.5547576483806694
Iteration 700, Loss: 0.5510766572497328
Iteration 800, Loss: 0.5486068886424629
Iteration 900, Loss: 0.5469322677781356
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6866313570214306
Iteration 200, Loss: 0.6828523667415214
Iteration 300, Loss: 0.6806130222427432
Iteration 400, Loss: 0.67925872760946
Iteration 500, Loss: 0.6784236845691041
Iteration 600, Loss: 0.6778989680879777
Iteration 700, Loss: 0.6775628685075566
Iteration 800, Loss: 0.6773432457697216
Iteration 900, Loss: 0.6771966864281369
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6771211282724392
Iteration 200, Loss: 0.6671941478014198
Iteration 300, Loss: 0.6610008077566966
Iteration 400, Loss: 0.657103679197513
Iteration

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 0.6688478634397709
Iteration 200, Loss: 0.6544864842436874
Iteration 300, Loss: 0.6458482865032651
Iteration 400, Loss: 0.6405684949355626
Iteration 500, Loss: 0.637296610750789
Iteration 600, Loss: 0.6352449773802993
Iteration 700, Loss: 0.6339451608119322
Iteration 800, Loss: 0.633113891286809
Iteration 900, Loss: 0.6325774833420706
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6855549551821756
Iteration 200, Loss: 0.6812453848859968
Iteration 300, Loss: 0.6787675527398702
Iteration 400, Loss: 0.6773253304907624
Iteration 500, Loss: 0.676476662433904
Iteration 600, Loss: 0.6759725285117253
Iteration 700, Loss: 0.6756706338190585
Iteration 800, Loss: 0.6754886018532575
Iteration 900, Loss: 0.6753781950249211
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6804868136295368
Iteration 200, Loss: 0.6729549617678307
Iteration 300, Loss: 0.6684224328825608
Iteration 400, Loss: 0.6656647719325317
Iteration

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6837598826558597
Iteration 200, Loss: 0.6781957121380711
Iteration 300, Loss: 0.674865634089666
Iteration 400, Loss: 0.6728562772775151
Iteration 500, Loss: 0.67163578389133
Iteration 600, Loss: 0.6708905303291586
Iteration 700, Loss: 0.6704335506906459
Iteration 800, Loss: 0.6701523825906819
Iteration 900, Loss: 0.6699788939037545
Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 0.6821489290416286
Iteration 200, Loss: 0.6757102353140237
Iteration 300, Loss: 0.6718977756610841
Iteration 400, Loss: 0.6696166398850629
Iteration 500, Loss: 0.6682395399687281
Iteration 600, Loss: 0.6674020987409582
Iteration 700, Loss: 0.6668898274755722
Iteration 800, Loss: 0.6665749870262024
Iteration 900, Loss: 0.6663807555606374
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6842954967518468
Iteration 200, Loss: 0.6788679260941861
Iteration 300, Loss: 0.6755159273002478
Iteration 400, Loss: 0.6734295676439167
Iteration

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6785476608323464
Iteration 200, Loss: 0.6696073419885342
Iteration 300, Loss: 0.6640186605688913
Iteration 400, Loss: 0.6604498244815573
Iteration 500, Loss: 0.6581206403520524
Iteration 600, Loss: 0.6565662297020016
Iteration 700, Loss: 0.6555049305077881
Iteration 800, Loss: 0.6547633489376486
Iteration 900, Loss: 0.6542330888322804
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6867741261479812
Iteration 200, Loss: 0.6829234873366888
Iteration 300, Loss: 0.6805906377109167
Iteration 400, Loss: 0.679173393919662
Iteration 500, Loss: 0.6783102005097217
Iteration 600, Loss: 0.6777832961244576
Iteration 700, Loss: 0.6774610674983045
Iteration 800, Loss: 0.6772637042898453
Iteration 900, Loss: 0.6771426670439614


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6896681545193358
Iteration 200, Loss: 0.6875540324520472
Iteration 300, Loss: 0.6862677722286636
Iteration 400, Loss: 0.6854840881141921
Iteration 500, Loss: 0.6850059342816938
Iteration 600, Loss: 0.6847138081678371
Iteration 700, Loss: 0.6845351194652267
Iteration 800, Loss: 0.6844256999629342
Iteration 900, Loss: 0.6843586319288684
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6538821419736085
Iteration 200, Loss: 0.6302211991310576
Iteration 300, Loss: 0.6157434183712364
Iteration 400, Loss: 0.6067379017463298
Iteration 500, Loss: 0.6010496132424318
Iteration 600, Loss: 0.5974074198239906
Iteration 700, Loss: 0.5950476129361957
Iteration 800, Loss: 0.5935029645865394
Iteration 900, Loss: 0.5924828607115229
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6859782067074564
Iteration 200, Loss: 0.6820768596153391
Iteration 300, Loss: 0.6798968391560168
Iteration 400, Loss: 0.6786456494046053
Iterat

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6418320017374145
Iteration 200, Loss: 0.6104373489763775
Iteration 300, Loss: 0.5909491996416079
Iteration 400, Loss: 0.5786432770365679
Iteration 500, Loss: 0.5707408500109086
Iteration 600, Loss: 0.56558733926497
Iteration 700, Loss: 0.5621800340397272
Iteration 800, Loss: 0.5598998161125024
Iteration 900, Loss: 0.5583575192641936
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.691204032761154
Iteration 200, Loss: 0.69004976610784
Iteration 300, Loss: 0.689361161953759
Iteration 400, Loss: 0.688948473898394
Iteration 500, Loss: 0.6887000326115876
Iteration 600, Loss: 0.6885498324639526
Iteration 700, Loss: 0.6884586649051255
Iteration 800, Loss: 0.6884031232764123
Iteration 900, Loss: 0.6883691681394983
Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 0.6902501184988656
Iteration 200, Loss: 0.6884895428077522
Iteration 300, Loss: 0.6874180602731121
Iteration 400, Loss: 0.6867648466690123
Iteration 500

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6822950878511165
Iteration 200, Loss: 0.6756910010939704
Iteration 300, Loss: 0.6716479941355521
Iteration 400, Loss: 0.6691551712251308
Iteration 500, Loss: 0.6676058281842775
Iteration 600, Loss: 0.6666339867647953
Iteration 700, Loss: 0.6660176222450582
Iteration 800, Loss: 0.6656212959436019
Iteration 900, Loss: 0.6653619646839529


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6828489908239939
Iteration 200, Loss: 0.6768706785703121
Iteration 300, Loss: 0.6733297795763996
Iteration 400, Loss: 0.6711934361812952
Iteration 500, Loss: 0.669881410034053
Iteration 600, Loss: 0.6690605952924467
Iteration 700, Loss: 0.6685364416202352
Iteration 800, Loss: 0.6681937373190079
Iteration 900, Loss: 0.6679634607484325
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6899472085597042
Iteration 200, Loss: 0.6880102042679548
Iteration 300, Loss: 0.686835934691853
Iteration 400, Loss: 0.6861227964371405
Iteration 500, Loss: 0.6856889508903062
Iteration 600, Loss: 0.6854245995377214
Iteration 700, Loss: 0.685263303769554
Iteration 800, Loss: 0.6851647736583573
Iteration 900, Loss: 0.6851045264216727
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6888060137788693
Iteration 200, Loss: 0.6860700354268467
Iteration 300, Loss: 0.6843336154850718
Iteration 400, Loss: 0.683222861119546
Iteration 

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6896083231208086
Iteration 200, Loss: 0.6875523804654314
Iteration 300, Loss: 0.6863498394262645
Iteration 400, Loss: 0.6856417454758975
Iteration 500, Loss: 0.6852221813720203
Iteration 600, Loss: 0.6849721533152636
Iteration 700, Loss: 0.6848223820438945
Iteration 800, Loss: 0.6847322445399131
Iteration 900, Loss: 0.6846777644523743
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.681343346935875
Iteration 200, Loss: 0.6741756576311704
Iteration 300, Loss: 0.6698083576826257
Iteration 400, Loss: 0.6671365765296137
Iteration 500, Loss: 0.6654954569415322
Iteration 600, Loss: 0.6644836336746966
Iteration 700, Loss: 0.663857703183562
Iteration 800, Loss: 0.6634693415970888
Iteration 900, Loss: 0.6632277476843508


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6874635298033883
Iteration 200, Loss: 0.6846663866615278
Iteration 300, Loss: 0.6832222551323992
Iteration 400, Loss: 0.6824465849676329
Iteration 500, Loss: 0.6820167304617015
Iteration 600, Loss: 0.6817726099198294
Iteration 700, Loss: 0.6816313020267424
Iteration 800, Loss: 0.6815482963896438
Iteration 900, Loss: 0.6814989894471086
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.691892947297369
Iteration 200, Loss: 0.6911328991708829
Iteration 300, Loss: 0.6906720520744359
Iteration 400, Loss: 0.690392444674672
Iteration 500, Loss: 0.690222694465806
Iteration 600, Loss: 0.6901195792487625
Iteration 700, Loss: 0.6900569087845956
Iteration 800, Loss: 0.690018801477856
Iteration 900, Loss: 0.6899956201119168


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599454
Iteration 100, Loss: 0.6826493401360584
Iteration 200, Loss: 0.6760407504140915
Iteration 300, Loss: 0.6718128117406558
Iteration 400, Loss: 0.6690560130951518
Iteration 500, Loss: 0.6672195996565862
Iteration 600, Loss: 0.6659674720479386
Iteration 700, Loss: 0.665092521799508
Iteration 800, Loss: 0.6644656840583398
Iteration 900, Loss: 0.6640054980771446
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6919844221691881
Iteration 200, Loss: 0.6912969110414817
Iteration 300, Loss: 0.690885094513418
Iteration 400, Loss: 0.6906353064538941
Iteration 500, Loss: 0.6904818661875219
Iteration 600, Loss: 0.6903863361931861
Iteration 700, Loss: 0.6903259726710188
Iteration 800, Loss: 0.6902871872177766
Iteration 900, Loss: 0.6902617900565626
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6895326409942151
Iteration 200, Loss: 0.6873286016997209
Iteration 300, Loss: 0.6859821779806213
Iteration 400, Loss: 0.6851578742205412
Iteratio

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6920966387801158
Iteration 200, Loss: 0.6914656761266542
Iteration 300, Loss: 0.691081526785553
Iteration 400, Loss: 0.6908444300715857
Iteration 500, Loss: 0.6906961268258406
Iteration 600, Loss: 0.6906021619957396
Iteration 700, Loss: 0.6905418906590262
Iteration 800, Loss: 0.6905027800342393
Iteration 900, Loss: 0.6904771233975605
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6770743986784117
Iteration 200, Loss: 0.6689257290278333
Iteration 300, Loss: 0.6645672928366391
Iteration 400, Loss: 0.6621058088543074
Iteration 500, Loss: 0.6606404667395803
Iteration 600, Loss: 0.6597232576431681
Iteration 700, Loss: 0.6591213045425067
Iteration 800, Loss: 0.6587083661956487
Iteration 900, Loss: 0.658413291007046
Iteration 0, Loss: 0.6931471805599454
Iteration 100, Loss: 0.6823509643978022
Iteration 200, Loss: 0.6757922773519516
Iteration 300, Loss: 0.6717881921613896
Iteration 400, Loss: 0.669329343211601
Iteration

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6658709173661822
Iteration 200, Loss: 0.6494009800114527
Iteration 300, Loss: 0.6393656063574409
Iteration 400, Loss: 0.6331895640948115
Iteration 500, Loss: 0.6293527739685953
Iteration 600, Loss: 0.6269494323411221
Iteration 700, Loss: 0.6254333738419219
Iteration 800, Loss: 0.6244713884721107
Iteration 900, Loss: 0.623857998015836
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6599618726852476
Iteration 200, Loss: 0.6401947459585032
Iteration 300, Loss: 0.6282446171478961
Iteration 400, Loss: 0.620910694474993
Iteration 500, Loss: 0.6163488305516528
Iteration 600, Loss: 0.6134787070607279
Iteration 700, Loss: 0.6116558414878944
Iteration 800, Loss: 0.6104891448985471
Iteration 900, Loss: 0.6097377162554926
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6809101447071659
Iteration 200, Loss: 0.6734019733857362
Iteration 300, Loss: 0.6687669221097573
Iteration 400, Loss: 0.6658842833828484
Iteratio

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599452
Iteration 100, Loss: 0.6459761324504484
Iteration 200, Loss: 0.613726497651993
Iteration 300, Loss: 0.5909158239407505
Iteration 400, Loss: 0.5742140219394719
Iteration 500, Loss: 0.5615997584925142
Iteration 600, Loss: 0.5518178027743416
Iteration 700, Loss: 0.5440640035922575
Iteration 800, Loss: 0.5378058864245463
Iteration 900, Loss: 0.5326793730085325
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6846062952842208
Iteration 200, Loss: 0.6794244552515382
Iteration 300, Loss: 0.6762725707990391
Iteration 400, Loss: 0.6743496992782904
Iteration 500, Loss: 0.6731731937571763
Iteration 600, Loss: 0.6724514624474208
Iteration 700, Loss: 0.6720077135748924
Iteration 800, Loss: 0.6717343623107588
Iteration 900, Loss: 0.671565713644785
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6796338250489358
Iteration 200, Loss: 0.6713739563791807
Iteration 300, Loss: 0.6662609783334706
Iteration 400, Loss: 0.6630519461038497
Iteratio

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6055075148582609
Iteration 200, Loss: 0.5517620414552795
Iteration 300, Loss: 0.5179884950450749
Iteration 400, Loss: 0.496165740856457
Iteration 500, Loss: 0.4816829235625221
Iteration 600, Loss: 0.4718362628919395
Iteration 700, Loss: 0.46499687458951927
Iteration 800, Loss: 0.4601557342669759
Iteration 900, Loss: 0.456671172900638


  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6809073010638185
Iteration 200, Loss: 0.6735317932490381
Iteration 300, Loss: 0.6690645951049684
Iteration 400, Loss: 0.6663434338191194
Iteration 500, Loss: 0.6646769387235159
Iteration 600, Loss: 0.6636515301865133
Iteration 700, Loss: 0.6630180746354216
Iteration 800, Loss: 0.6626254625644402
Iteration 900, Loss: 0.6623814693116207
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6915305835126238
Iteration 200, Loss: 0.6905635255057597
Iteration 300, Loss: 0.6899697004196674
Iteration 400, Loss: 0.6895953400375892
Iteration 500, Loss: 0.6893532230661343
Iteration 600, Loss: 0.6891928132633651
Iteration 700, Loss: 0.6890841599968577
Iteration 800, Loss: 0.6890090923022997
Iteration 900, Loss: 0.688956321527222
Iteration 0, Loss: 0.6931471805599454
Iteration 100, Loss: 0.6886570066621949
Iteration 200, Loss: 0.685997446242986
Iteration 300, Loss: 0.6844126041293915
Iteration 400, Loss: 0.6834626459906449
Iteratio

  _warn_prf(average, modifier, msg_start, len(result))


Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6737464877754505
Iteration 200, Loss: 0.661916654116816
Iteration 300, Loss: 0.6546053414508267
Iteration 400, Loss: 0.6500195958490964
Iteration 500, Loss: 0.6470987628593635
Iteration 600, Loss: 0.6452083505196816
Iteration 700, Loss: 0.6439641247767746
Iteration 800, Loss: 0.6431305559643712
Iteration 900, Loss: 0.6425615609143579
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6928551241243154
Iteration 200, Loss: 0.6926966534773301
Iteration 300, Loss: 0.6926098327258713
Iteration 400, Loss: 0.6925617398307973
Iteration 500, Loss: 0.6925347685894899
Iteration 600, Loss: 0.6925194351480595
Iteration 700, Loss: 0.6925105881282717
Iteration 800, Loss: 0.6925054027909191
Iteration 900, Loss: 0.6925023135670877
Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6831166884448852
Iteration 200, Loss: 0.6771255254311638
Iteration 300, Loss: 0.6735258379473585
Iteration 400, Loss: 0.671349873307377
Iteratio

**Hồi quy Logistic sử dụng Thuật toán Stochastic Gradient Descent cho Hồi quy Logistic**

In [4]:
#Dựa trên thư viện SGDClassifier trong Sklearnt 
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
# Đọc dữ liệu
# Đường dẫn đến thư mục chứa các file CSV
folder_path = r'D:\DeTaiNam2024\SoLieu\CK_HOSE_VNSTOCK'

# Lấy danh sách tất cả các file CSV trong thư mục
file_list = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
results=[]
# Vòng lặp qua từng file CSV
for file_name in file_list:
    # Đọc dữ liệu từ file CSV
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path)

    # Giả sử dữ liệu có cột: 'time', 'close', 'volume'
    df = df.sort_values('time')

    # Tạo biến Y: 1 nếu giá tăng, 0 nếu giá giảm
    df['Y'] = (df['close'].diff() > 0).astype(int)

    # Tạo các đặc trưng (ở đây chỉ sử dụng giá hôm trước và khối lượng giao dịch)
    df['Close_lag1'] = df['close'].shift(1)
    df['Volume_lag1'] = df['volume'].shift(1)
    df.dropna(inplace=True)

    # Chọn X (các đặc trưng) và Y (nhãn mục tiêu)
    X = df[['Close_lag1', 'Volume_lag1']].values
    Y = df['Y'].values

    # Chuẩn hóa dữ liệu
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    # Chia dữ liệu thành tập huấn luyện và kiểm tra
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
    # Khởi tạo mô hình SGD Logistic Regression
    sgd_classifier = SGDClassifier(loss='huber', learning_rate='optimal', eta0=0.01, max_iter=1000)
    # Huấn luyện mô hình
    sgd_classifier.fit(X_train, Y_train)
    # Dự đoán
    Y_pred = sgd_classifier.predict(X_test)
    # Tính độ chính xác
    accuracy = accuracy_score(Y_test, Y_pred)
    precision = precision_score(Y_test, Y_pred, average='weighted')
    recall = recall_score(Y_test, Y_pred, average='weighted')
    f1 = f1_score(Y_test, Y_pred, average='weighted')
    
      # Lưu kết quả vào danh sách
    results.append({
        'file': file_name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    })
# Ghi kết quả ra file CSV
results_df = pd.DataFrame(results)
results_df.to_csv(r'D:\DeTaiNam2024\SoLieu\CK_HOSE_VNSTOCK\output\logistic_regression_SGD_results_HOSE.csv', index=False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

ValueError: The number of classes has to be greater than one; got 1 class

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
# Hàm sigmoid
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Hàm mất mát (binary cross-entropy)
def compute_loss(Y, Y_hat):
    m = Y.shape[0]
    return -(1/m) * np.sum(Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))

# Hàm Batch Gradient Descent
def batch_gradient_descent(X, Y, learning_rate=0.01, iterations=1000):
    m, n = X.shape
    # Khởi tạo tham số (weights và bias)
    weights = np.zeros(n)
    bias = 0
    losses = []

    for i in range(iterations):
        # Tính toán dự đoán Y_hat
        Z = np.dot(X, weights) + bias
        Y_hat = sigmoid(Z)

        # Tính gradient của hàm mất mát
        dw = (1/m) * np.dot(X.T, (Y_hat - Y))
        db = (1/m) * np.sum(Y_hat - Y)

        # Cập nhật trọng số
        weights -= learning_rate * dw
        bias -= learning_rate * db

        # Tính và lưu lại hàm mất mát
        loss = compute_loss(Y, Y_hat)
        losses.append(loss)

        # In ra mỗi 100 iterations
        if i % 100 == 0:
            print(f'Iteration {i}, Loss: {loss}')

    return weights, bias, losses

# Hàm dự đoán
def predict(X, weights, bias):
    Z = np.dot(X, weights) + bias
    Y_hat = sigmoid(Z)
    return np.where(Y_hat >= 0.5, 1, 0)

# Đọc dữ liệu
df = pd.read_csv('cpi.csv')

# Giả sử dữ liệu có cột: 'time', 'close', 'Volume'
df = df.sort_values('t')

# Tạo biến Y: 1 nếu giá tăng, 0 nếu giá giảm
df['Y'] = (df['cpi'].diff() > 0).astype(int)

# Tạo các đặc trưng (ở đây chỉ sử dụng cpi hôm trước)
df['cpi_lag1'] = df['cpi'].shift(1)

df.dropna(inplace=True)

# Chọn X (các đặc trưng) và Y (nhãn mục tiêu)
X = df[['cpi_lag1']].values
Y = df['Y'].values

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Chia dữ liệu thành tập huấn luyện và kiểm tra
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Huấn luyện mô hình bằng Batch Gradient Descent
weights, bias, losses = batch_gradient_descent(X_train, Y_train, learning_rate=0.01, iterations=1000)

# Dự đoán trên tập kiểm tra
Y_pred = predict(X_test, weights, bias)

# Đánh giá mô hình
accuracy = np.mean(Y_pred == Y_test)
print(f'Accuracy: {accuracy:.2f}')
# Báo cáo chi tiết kết quả dự đoán
print(classification_report(Y_test, Y_pred))

Iteration 0, Loss: 0.6931471805599453
Iteration 100, Loss: 0.6581083158597071
Iteration 200, Loss: 0.6369392865692096
Iteration 300, Loss: 0.6235701815708732
Iteration 400, Loss: 0.6147476565223007
Iteration 500, Loss: 0.6087058740849657
Iteration 600, Loss: 0.6044419526871659
Iteration 700, Loss: 0.6013582400752571
Iteration 800, Loss: 0.5990829158545075
Iteration 900, Loss: 0.597375972085473
Accuracy: 0.71
              precision    recall  f1-score   support

           0       0.78      0.56      0.65        32
           1       0.67      0.85      0.75        33

    accuracy                           0.71        65
   macro avg       0.72      0.71      0.70        65
weighted avg       0.72      0.71      0.70        65



**CHỈ SỐ KINH TẾ VĨ MÔ: CPI**

In [19]:

# Đọc dữ liệu
df = pd.read_csv('cpi.csv')
#Kiểm tra tiêu đề các cột
df.head()
# Giả sử dữ liệu có cột: 't', 'cpi'
df = df.sort_values('t')

# Tạo biến Y: 1 nếu giá tăng, 0 nếu giá giảm
df['Y'] = (df['cpi'].diff() > 0).astype(int)

In [4]:
# Hiển thị dữ liệu với cột Y
print(df[['t', 'cpi', 'Y']])

           t     cpi  Y
0     1995M1  103.80  0
9    1995M10  100.10  0
10   1995M11  100.10  0
11   1995M12  100.30  1
1     1995M2  103.40  1
..       ...     ... ..
316   2021M5  100.16  1
317   2021M6  100.19  1
318   2021M7  100.62  1
319   2021M8  100.25  0
320   2021M9   99.38  0

[324 rows x 3 columns]


In [6]:
df.head()

Unnamed: 0,t,cpi,Y
0,1995M1,103.8,0
9,1995M10,100.1,0
10,1995M11,100.1,0
11,1995M12,100.3,1
1,1995M2,103.4,1


In [20]:
# Import các thư viện cần thiết
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Bước 1: Đọc dữ liệu, tạo biến mục tiêu Y
data = pd.read_csv('cpi.csv')
#Kiểm tra tiêu đề các cột
df.head()
#Dữ liệu có cột: 't', 'cpi'
df = df.sort_values('t')

# Tạo biến Y: 1 nếu giá tăng, 0 nếu giá giảm
df['Y'] = (df['cpi'].diff() > 0).astype(int)

# Bước 2: Tạo các đặc trưng (ở đây chỉ sử dụng cpi hôm trước)
df['cpi_lag1'] = df['cpi'].shift(1)

df.dropna(inplace=True)

# Chọn X (các đặc trưng) và Y (nhãn mục tiêu)
X = df[['cpi_lag1']].values
Y = df['Y'].values
# Bước 3: Chia dữ liệu thành tập huấn luyện và kiểm tra
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Bước 4: Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Bước 5: Huấn luyện mô hình Logistic Regression
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Bước 6: Dự đoán trên tập kiểm tra
y_pred = model.predict(X_test_scaled)

# Bước 7: Đánh giá mô hình
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Độ chính xác của mô hình: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)


Độ chính xác của mô hình: 67.69%
Confusion Matrix:
[[18 14]
 [ 7 26]]
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.56      0.63        32
           1       0.65      0.79      0.71        33

    accuracy                           0.68        65
   macro avg       0.69      0.68      0.67        65
weighted avg       0.68      0.68      0.67        65

