#### PREDICCION GENERAL
Se aplica el modelo predictivo a todos los activos 

In [1]:
!pip install pandas_ta



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import pandas_ta
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tsa.stattools import adfuller
import os

rmse = lambda act, pred: np.sqrt(mean_squared_error(act, pred))

  import pandas.util.testing as tm


In [3]:
#if you are using colab uncomment the following line
#from google.colab import drive
#drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:

#directorio donde estan guardados los datos
direccion = os.path.join(os.getcwd(), "drive", "MyDrive", "Proyecto Integrador")
os.chdir(os.path.join(direccion, "raw"))

In [5]:
#Se  cargan los datos
df = pd.read_csv(r"BSE_30.csv",sep=',')
#Se transforma la columna "Date" a formato Datetime
df["Date"] = pd.to_datetime(df["Date"])
# Los datos se pasan de formato long a wide
df_wide = df.pivot(index = "Date", columns="Symbol").copy()["Adj Close"]
df_wide.columns.name = None
#Se reemplazan los valores nulos
df_wide = df_wide.fillna(method="backfill").copy()

## HASTA AQUI ES LA LIMPIEZA________________________________________________________________

df_wide.to_csv(os.path.join(direccion,"trusted", "BSE_30_preprocessing.csv"))
tdir= os.path.join(direccion,"trusted", "BSE_30_preprocessing.csv")




In [6]:
#INICIA LA PREDICCION  ____________________________________________________________________________________________
# Se separa el dataset en test y train con porcentajes de 20% y 80% respectivamente
#como son series de tiempo se toman los primeros registros como train y los ultimos como test
df_close  = pd.read_csv(tdir,sep=',', index_col="Date"   )
t = len(df_close)
pct20 = int(np.round(t*0.2))
train = df_close[:-pct20]
test = df_close[-pct20:]

In [7]:
#Se ajusta un modelo de regresion lineal simple para cada activo y se predicen los ultimos 480 dias de adj_close price
resultados ={}
for activo in train.columns:
    #Train
    activo_train = train[[activo]].copy().rename(columns={activo:"adj_close"})
    activo_train["ema15"] = activo_train.ta.ema(close='adj_close', length=15)
    activo_train = activo_train.dropna()
    #Test
    activo_test = test[[activo]].copy().rename(columns={activo:"adj_close"})
    activo_test["ema15"] = activo_test.ta.ema(close='adj_close', length=15)
    activo_test = activo_test.dropna()

    #Se ajusta el modelo de regresion lineal y se realiza la prediccion
    mod = smf.ols(formula ="adj_close~ ema15", data= activo_train)
    res = mod.fit()

    y_test = activo_test["adj_close"]
    y_pred = res.predict(activo_test["ema15"])
    #Resultados
    MAE = mean_absolute_error(y_test, y_pred).round(2)
    MSE = mean_squared_error(y_test, y_pred).round(2)
    R2 = r2_score(y_test, y_pred).round(2)
    R2_adj = (1 - (1-R2)*(len(y_test)-1)/(len(y_test)- (len(res.params)-1) -1 )).round(3) 
    MAPE = mean_absolute_percentage_error(y_test, y_pred)
    RMSE = rmse(y_test,y_pred)

    resultados[activo] = [y_pred, MAE,MSE,R2,R2_adj,MAPE,RMSE]

    

In [8]:
# Se exporta el adj_close price predecido de cada activo
df_predicted = pd.DataFrame()
for key in resultados.keys():
    df_predicted[key] = resultados[key][0].values

df_predicted.index = resultados["ASIANPAINT"][0].index
#df_predicted.to_csv("BSE30_prediction.csv")
df_predicted.to_csv(os.path.join(direccion,"refined", "BSE30_prediction.csv"))


In [9]:
#metricas del portafolio construido
top5 = ["MARUTI", "ADANIPORTS", "RELIANCE", "TATASTEEL", "YESBANK"]
metricas = ["MAE","MSE","R2","R2_adj","MAPE","RMSE"]

print("MARUTI")
print(resultados['MARUTI'][1:])

print("ADANIPORTS")
print(resultados["ADANIPORTS"][1:])

print("RELIANCE")
print(resultados["RELIANCE"][1:])

print("TATASTEEL")
print(resultados["TATASTEEL"][1:])

print("YESBANK")
print(resultados["YESBANK"][1:])


MARUTI
[128.5, 27853.9, 0.99, 0.99, 0.01946623831060271, 166.89486712216797]
ADANIPORTS
[8.11, 102.13, 0.98, 0.98, 0.024789504393005694, 10.10603246695263]
RELIANCE
[14.36, 385.96, 0.99, 0.99, 0.02019890136056184, 19.645916251555256]
TATASTEEL
[14.19, 300.93, 0.98, 0.98, 0.028571108022257433, 17.34731515158897]
YESBANK
[7.74, 106.84, 0.95, 0.95, 0.02720670081182756, 10.336123488562212]
