In [19]:
# Importación de librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier  # ANN
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC  # SVM
from sklearn.metrics import accuracy_score
from datetime import datetime as dt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [20]:
# Cargar los datos
dataset = pd.read_csv('../data/processed/features_for_model.csv')
dataset.head()

Unnamed: 0.1,Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,Daily Change,Price Range,Return
0,0,137.610001,137.789993,136.440002,137.059998,7037300,0.0,0.0,0.0,1.349991,-0.550003,0.0
1,1,137.029999,137.589996,136.190002,137.429993,6448600,0.0,0.0,0.0,1.399994,0.399994,0.0027
2,2,137.800003,138.220001,137.639999,137.979996,8776000,0.0,0.0,0.0,0.580002,0.179993,0.004002
3,3,138.389999,138.940002,137.869995,138.559998,5220500,0.0,0.0,0.0,1.070007,0.169998,0.004204
4,4,138.029999,138.419998,137.970001,138.210007,10106700,0.0,0.0,0.0,0.449997,0.180008,-0.002526


In [43]:
import pandas as pd

# Convertir la columna de fecha a datetime (si fuera necesario en análisis posterior)
dataset['Volume'] = pd.to_datetime(dataset['Volume'], errors='coerce')

# Crear la columna objetivo: 1 si el precio de cierre es mayor que el de apertura, 0 en caso contrario
dataset['Target'] = (dataset['Close'] > dataset['Open'])

# Seleccionar características y objetivo
features = ['Open', 'High', 'Low', 'Close']
X = dataset[features]
y = dataset['Target']             # Etiquetas

In [45]:
# Dividir los datos en conjuntos de entrenamiento y prueba (80% - 20%)
# Dividir el conjunto de datos en entrenamiento y prueba
from sklearn.metrics import classification_report, confusion_matrix
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 1. Arboles de decisión

In [46]:
# Verificar el tipo de datos de las columnas seleccionadas
print(dataset.dtypes)

Unnamed: 0                int64
Open                    float64
High                    float64
Low                     float64
Close                   float64
Volume           datetime64[ns]
Dividends               float64
Stock Splits            float64
Capital Gains           float64
Daily Change            float64
Price Range             float64
Return                  float64
Target                     bool
dtype: object


In [48]:
# Convertir columnas específicas a tipo numérico
columnas_a_convertir = ['Open', 'High', 'Low', 'Close', 'Volume']
dataset[columnas_a_convertir] =dataset[columnas_a_convertir].apply(pd.to_numeric, errors='coerce')

# Verificar si la conversión fue exitosa
print(dataset.dtypes)

Unnamed: 0         int64
Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Dividends        float64
Stock Splits     float64
Capital Gains    float64
Daily Change     float64
Price Range      float64
Return           float64
Target              bool
dtype: object


In [50]:
dataset['Volume'] = dataset['Volume'].astype('int64')  # Convertir a entero

In [51]:
# Asegurarse de que solo se seleccionan columnas numéricas
features = ['Open', 'High', 'Low', 'Volume']  # Excluye cualquier columna no numérica
X = dataset[features]
y = dataset['Target']

In [52]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar el modelo de árbol de decisión
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Realizar predicciones y evaluar
y_pred = decision_tree.predict(X_test)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

report, conf_matrix, accuracy

('              precision    recall  f1-score   support\n\n       False       0.71      0.64      0.67       129\n        True       0.65      0.72      0.69       123\n\n    accuracy                           0.68       252\n   macro avg       0.68      0.68      0.68       252\nweighted avg       0.68      0.68      0.68       252\n',
 array([[82, 47],
        [34, 89]]),
 0.6785714285714286)

### 2. Random Forest

In [54]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inicializar y entrenar el modelo de Random Forest
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)

# Realizar predicciones
y_pred = random_forest.predict(X_test)

# Evaluar el modelo
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Mostrar los resultados
print("Random Forest - Resultados")
print("Exactitud:", accuracy)
print("Matriz de confusión:\n", conf_matrix)
print("Reporte de clasificación:\n", report)

Random Forest - Resultados
Exactitud: 0.6825396825396826
Matriz de confusión:
 [[89 40]
 [40 83]]
Reporte de clasificación:
               precision    recall  f1-score   support

       False       0.69      0.69      0.69       129
        True       0.67      0.67      0.67       123

    accuracy                           0.68       252
   macro avg       0.68      0.68      0.68       252
weighted avg       0.68      0.68      0.68       252



### 3. Naives Bayes

In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

# Normalizar las características para Naive Bayes
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Crear el modelo de Naive Bayes
modelo6 = GaussianNB()

# Entrenar el modelo
modelo6.fit(X_train, y_train)

# Predecir los resultados en el conjunto de prueba
y_pred = modelo6.predict(X_test)

# Evaluar el modelo
print("Matriz de Confusión:")
print(confusion_matrix(y_test, y_pred))

print("\nReporte de Clasificación:")
print(classification_report(y_test, y_pred))

print("\nPrecisión del Modelo:")
print(accuracy_score(y_test, y_pred))

Matriz de Confusión:
[[45 84]
 [36 87]]

Reporte de Clasificación:
              precision    recall  f1-score   support

       False       0.56      0.35      0.43       129
        True       0.51      0.71      0.59       123

    accuracy                           0.52       252
   macro avg       0.53      0.53      0.51       252
weighted avg       0.53      0.52      0.51       252


Precisión del Modelo:
0.5238095238095238


### 4. Suport Vector Machine (SVM)

In [56]:
# Normalizar las características para mejorar el rendimiento de SVM
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Crear el modelo SVM con un kernel lineal
model = SVC(kernel='linear', C=1.0, random_state=42)

# Entrenar el modelo
model.fit(X_train, y_train)

# Predecir los resultados en el conjunto de prueba
y_pred = model.predict(X_test)

# Evaluar el modelo
print("Matriz de Confusión:")
print(confusion_matrix(y_test, y_pred))

print("\nReporte de Clasificación:")
print(classification_report(y_test, y_pred))

print("\nPrecisión del Modelo:")
print(accuracy_score(y_test, y_pred))

Matriz de Confusión:
[[ 37  92]
 [  1 122]]

Reporte de Clasificación:
              precision    recall  f1-score   support

       False       0.97      0.29      0.44       129
        True       0.57      0.99      0.72       123

    accuracy                           0.63       252
   macro avg       0.77      0.64      0.58       252
weighted avg       0.78      0.63      0.58       252


Precisión del Modelo:
0.6309523809523809


In [41]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp311-cp311-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp311-cp311-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorf

  You can safely remove it manually.
  You can safely remove it manually.


### 5. Redes Neuronales

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Normalizar las características (esto es importante para redes neuronales)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Construcción de un modelo simple de red neuronal
modelo_nn = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Cambia según el tipo de problema
])

# Compilar el modelo
modelo_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Entrenar el modelo
modelo_nn.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

# Evaluar el modelo
y_pred_nn = (modelo_nn.predict(X_test_scaled) > 0.5).astype("int32")
print(classification_report(y_test, y_pred_nn))
print(confusion_matrix(y_test, y_pred_nn))

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.4594 - loss: 0.6978 - val_accuracy: 0.4960 - val_loss: 0.6937
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5080 - loss: 0.6925 - val_accuracy: 0.5278 - val_loss: 0.6931
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5693 - loss: 0.6874 - val_accuracy: 0.5079 - val_loss: 0.6932
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5473 - loss: 0.6887 - val_accuracy: 0.5159 - val_loss: 0.6914
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5323 - loss: 0.6905 - val_accuracy: 0.5238 - val_loss: 0.6908
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5329 - loss: 0.6896 - val_accuracy: 0.5238 - val_loss: 0.6914
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━