In [None]:
pip install pandas sklearn

In [34]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import pandas as pd

In [73]:
data = pd.read_csv("data/csv/reports-data.csv")

In [74]:
data.head()

Unnamed: 0,releaseDate,stockDirection,ipcaData4Weeks,ipcaData1Week,ipcaDataToday,pibData4Weeks,pibData1Week,pibDataToday,cambioData4Weeks,cambioData1Week,...,investimentoDataToday,dividaLiquidaData4Weeks,dividaLiquidaData1Week,dividaLiquidaDataToday,resPrimarioData4Weeks,resPrimarioData1Week,resPrimarioDataToday,resNominalData4Weeks,resNominalData1Week,resNominalDataToday
0,2024-03-11,-1,3.82,3.76,3.77,1.6,1.77,1.78,4.92,4.93,...,67.0,63.6,63.74,63.64,-0.8,-0.78,-0.79,-6.8,-6.9,-6.9
1,2024-03-18,1,3.82,3.77,3.79,1.68,1.78,1.8,4.93,4.93,...,65.5,63.6,63.64,63.9,-0.8,-0.79,-0.75,-6.8,-6.9,-6.8
2,2024-04-01,-1,3.76,3.75,3.75,1.77,1.85,1.89,4.93,4.95,...,65.0,63.74,63.94,63.85,-0.78,-0.75,-0.7,-6.9,-6.8,-6.9
3,2024-04-08,1,3.77,3.75,3.76,1.78,1.89,1.9,4.93,4.95,...,65.0,63.64,63.85,63.85,-0.79,-0.7,-0.7,-6.9,-6.9,-6.9
4,2024-04-15,1,3.79,3.76,3.71,1.8,1.9,1.95,4.95,4.95,...,67.0,63.9,63.85,63.77,-0.75,-0.7,-0.7,-6.8,-6.9,-6.8


In [75]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 38 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   releaseDate              9 non-null      object 
 1   stockDirection           9 non-null      int64  
 2   ipcaData4Weeks           9 non-null      float64
 3   ipcaData1Week            9 non-null      float64
 4   ipcaDataToday            9 non-null      float64
 5   pibData4Weeks            9 non-null      float64
 6   pibData1Week             9 non-null      float64
 7   pibDataToday             9 non-null      float64
 8   cambioData4Weeks         9 non-null      float64
 9   cambioData1Week          9 non-null      float64
 10  cambioDataToday          9 non-null      float64
 11  selicData4Weeks          9 non-null      float64
 12  selicData1Week           9 non-null      float64
 13  selicDataToday           9 non-null      float64
 14  igpData4Weeks            9 non

In [76]:
data = data.drop("releaseDate", axis=1)
features = data.drop("stockDirection", axis=1)
target = data["stockDirection"]

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.5)

In [77]:
y_train.head()

0   -1
4    1
2   -1
1    1
Name: stockDirection, dtype: int64

In [78]:
X_train.head()

Unnamed: 0,ipcaData4Weeks,ipcaData1Week,ipcaDataToday,pibData4Weeks,pibData1Week,pibDataToday,cambioData4Weeks,cambioData1Week,cambioDataToday,selicData4Weeks,...,investimentoDataToday,dividaLiquidaData4Weeks,dividaLiquidaData1Week,dividaLiquidaDataToday,resPrimarioData4Weeks,resPrimarioData1Week,resPrimarioDataToday,resNominalData4Weeks,resNominalData1Week,resNominalDataToday
0,3.82,3.76,3.77,1.6,1.77,1.78,4.92,4.93,4.93,9.0,...,67.0,63.6,63.74,63.64,-0.8,-0.78,-0.79,-6.8,-6.9,-6.9
4,3.79,3.76,3.71,1.8,1.9,1.95,4.95,4.95,4.97,9.0,...,67.0,63.9,63.85,63.77,-0.75,-0.7,-0.7,-6.8,-6.9,-6.8
2,3.76,3.75,3.75,1.77,1.85,1.89,4.93,4.95,4.95,9.0,...,65.0,63.74,63.94,63.85,-0.78,-0.75,-0.7,-6.9,-6.8,-6.9
1,3.82,3.77,3.79,1.68,1.78,1.8,4.93,4.93,4.95,9.0,...,65.5,63.6,63.64,63.9,-0.8,-0.79,-0.75,-6.8,-6.9,-6.8


In [79]:

def create_models():
    models = [
        ('RandomForest', RandomForestClassifier(n_estimators=100, max_depth=5)),
        ('SVC', SVC(kernel='linear')),
        ('KNN', KNeighborsClassifier(n_neighbors=4)),
        ('LogReg', LogisticRegression())
    ]
    return models

In [80]:
def train_and_evaluate(models, X_train, X_test, y_train, y_test):
    
    results = []
    
    for name, model in models:
        # training
        print(f"Training the model {name} with data from training set...")
        model.fit(X_train, y_train)

        # predictions with data from test set
        y_pred = model.predict(X_test)

        # calculation of metrics
        report = classification_report(y_test, y_pred)
        print("Classification Report\n", report)

        results.append([model, {'model': name, 'predictions': y_pred, 'report': report,}])           

    return results

In [81]:
models = create_models()
models

[('RandomForest', RandomForestClassifier(max_depth=5)),
 ('SVC', SVC(kernel='linear')),
 ('KNN', KNeighborsClassifier(n_neighbors=4)),
 ('LogReg', LogisticRegression())]

In [83]:
results = train_and_evaluate(models, X_train, X_test, y_train, y_test)
results

Training the model RandomForest with data from training set...
Classification Report
               precision    recall  f1-score   support

          -1       0.00      0.00      0.00         1
           1       0.75      0.75      0.75         4

    accuracy                           0.60         5
   macro avg       0.38      0.38      0.38         5
weighted avg       0.60      0.60      0.60         5

Training the model SVC with data from training set...
Classification Report
               precision    recall  f1-score   support

          -1       0.00      0.00      0.00         1
           1       0.75      0.75      0.75         4

    accuracy                           0.60         5
   macro avg       0.38      0.38      0.38         5
weighted avg       0.60      0.60      0.60         5

Training the model KNN with data from training set...
Classification Report
               precision    recall  f1-score   support

          -1       0.20      1.00      0.33        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[[RandomForestClassifier(max_depth=5),
  {'model': 'RandomForest',
   'predictions': array([-1,  1,  1,  1,  1], dtype=int64),
   'report': '              precision    recall  f1-score   support\n\n          -1       0.00      0.00      0.00         1\n           1       0.75      0.75      0.75         4\n\n    accuracy                           0.60         5\n   macro avg       0.38      0.38      0.38         5\nweighted avg       0.60      0.60      0.60         5\n'}],
 [SVC(kernel='linear'),
  {'model': 'SVC',
   'predictions': array([ 1,  1,  1,  1, -1], dtype=int64),
   'report': '              precision    recall  f1-score   support\n\n          -1       0.00      0.00      0.00         1\n           1       0.75      0.75      0.75         4\n\n    accuracy                           0.60         5\n   macro avg       0.38      0.38      0.38         5\nweighted avg       0.60      0.60      0.60         5\n'}],
 [KNeighborsClassifier(n_neighbors=4),
  {'model': 'KNN',
   'pr