Experimento sobre finanzas

In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from datetime import datetime
from sklearn.model_selection import cross_val_score, KFold

In [15]:
# Ticker bitcoin-usd
btc = yf.Ticker('BTC-USD')
 
# msft.info will return all information
# about microsoft corporation
data = btc.history(period='max')
 
# printing the data
data.shape

(2957, 7)

In [16]:
#Chequeamos que la data este correcta y usamos las columnas que nos sirven para la prediccion
data1 = data[['Open', 'High', 'Low', 'Close', 'Volume']]
data1.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-10-17 00:00:00+00:00,19268.5625,19635.802734,19173.333984,19550.757812,27472552998
2022-10-18 00:00:00+00:00,19550.466797,19666.994141,19144.769531,19334.416016,30580012344
2022-10-19 00:00:00+00:00,19335.027344,19348.416016,19127.6875,19139.535156,22425387184
2022-10-20 00:00:00+00:00,19138.085938,19315.199219,18971.458984,19053.740234,24493974420
2022-10-21 00:00:00+00:00,19056.195312,19120.3125,18956.990234,18958.34375,26685202432


In [17]:
#Pasamos las fechas a una columna llamada 'Day'
data2 = data1.rename_axis('Day').reset_index()

In [18]:
#Obtenemos los años, meses, dia y dia de la semana
data2['year'] = pd.DatetimeIndex(data2['Day']).year
data2['month'] = pd.DatetimeIndex(data2['Day']).month
data2['day'] = pd.DatetimeIndex(data2['Day']).day
data2['dayofweek'] = pd.DatetimeIndex(data2['Day']).dayofweek

In [19]:
#Comprobamos que todo este cargado
data2.head()

Unnamed: 0,Day,Open,High,Low,Close,Volume,year,month,day,dayofweek
0,2014-09-17 00:00:00+00:00,465.864014,468.174011,452.421997,457.334015,21056800,2014,9,17,2
1,2014-09-18 00:00:00+00:00,456.859985,456.859985,413.104004,424.440002,34483200,2014,9,18,3
2,2014-09-19 00:00:00+00:00,424.102997,427.834991,384.532013,394.79599,37919700,2014,9,19,4
3,2014-09-20 00:00:00+00:00,394.673004,423.29599,389.882996,408.903992,36863600,2014,9,20,5
4,2014-09-21 00:00:00+00:00,408.084991,412.425995,393.181,398.821014,26580100,2014,9,21,6


In [20]:
#Sacamos la columna 'Day' ya que es un formato datetime y el modelo no puede aprender de ese tipo de datos
data3 = data2[['Open','High','Low','Close','Volume','year','month','day','dayofweek']]


In [21]:
#Separamos los datos para entrenar y los datos para predecir
X = data3[['Open','year','month','day','dayofweek']]

y = data3['Close']

In [22]:
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size=0.3,random_state=42)

In [27]:
print('Entrenamiento modelo 0/3 (Creando modelo)')
LReg = LinearRegression()

LReg.fit(X_train, y_train)
print('Linear regression, puntuacion del train:',LReg.score(X_train,y_train))
print('Linear regression, puntuacion del test:',LReg.score(X_test,y_test))
y_pred = LReg.predict(X_test)
print(mean_squared_error(y_test,y_pred,squared=False))

Entrenamiento modelo 0/3 (Creando modelo)
Linear regression, puntuacion del train: 0.9976821364742007
Linear regression, puntuacion del test: 0.997412595276937
872.5232136071529


Vamos a hacer cross-validation

In [24]:
kf = KFold(n_splits=6,shuffle=True,random_state=42)
reg = LinearRegression()
cv_scores = cross_val_score(reg,X,y, cv=kf)

print(cv_scores)

[0.99758825 0.99744044 0.99664733 0.99795092 0.99753509 0.99816695]


In [25]:
data3.tail()

Unnamed: 0,Open,High,Low,Close,Volume,year,month,day,dayofweek
2952,19268.5625,19635.802734,19173.333984,19550.757812,27472552998,2022,10,17,0
2953,19550.466797,19666.994141,19144.769531,19334.416016,30580012344,2022,10,18,1
2954,19335.027344,19348.416016,19127.6875,19139.535156,22425387184,2022,10,19,2
2955,19138.085938,19315.199219,18971.458984,19053.740234,24493974420,2022,10,20,3
2956,19056.195312,19120.3125,18956.990234,18958.34375,26685202432,2022,10,21,4


Trabajamos en la prediccion de mañana

In [26]:
#Aqui trataremos de predecir de acuerdo al input que le pongamos, ponemos el precio de apertura manualmente
open = input()
today = datetime.today()
year = today.year
month = today.month
day = today.day
dayofweek = today.weekday()
hour = today.hour
prueba = {'Open':[open],
            'year':[year],
            'month':[month],
            'day':[day],
            'dayofweek': [dayofweek],
}
prueba_final = pd.DataFrame(prueba)
print(prueba_final)
cierre = LReg.predict(prueba_final)
print('Precio de Cierre: $',cierre)

           Open  year  month  day  dayofweek
0  19056.195312  2022     10   21          4
Precio de Cierre: $ [19089.16902742]
