In [None]:
import os
import secrets_acces as sa

In [None]:
import boto3

class AWSHelper:

    def __init__(self):
        self.s3_connected = False
        
    def init_s3_session(self, aws_access_key_id, aws_secret_access_key):
         self.s3_connected = True
         self.s3_connection = boto3.client('s3', 
                    aws_access_key_id=aws_access_key_id,
                    aws_secret_access_key=aws_secret_access_key
                    )
         return print("Conexión Exitosa!")

    def call_s3_data(self, bucket_name, file_name, download_name):
        if self.s3_connected:
            self.s3_connection.Object(bucket_name=bucket_name, key=file_name).download_file(download_name)
        else:
            raise ConnectionError("Open S3 Connection before trying to get data!")
            
    def load_s3_data(self, bucket, file_name):
         if self.s3_connected:
            self.s3_connection.Object(bucket_name=bucket, key=file_name).upload_file(file_name)
         else:
            raise ConnectionError("Open S3 Connection before trying to push data!")

In [28]:
import requests
import urllib3
import pandas as pd
import re
from urllib3.exceptions import InsecureRequestWarning
urllib3.disable_warnings(InsecureRequestWarning)

class ConsultasApi:

    URLFONDOS="https://fiduciaria.grupobancolombia.com/consultarFondosInversion/rest/servicio/consultarListaFondos"
    URLRENTABILIDADES = "https://fiduciaria.grupobancolombia.com/consultarFondosInversion/rest/servicio/buscarInformacionFondo"
    VARIABLES = ['rentabilidad.anios.anioCorrido', 'rentabilidad.anios.ultimoAnio','rentabilidad.anios.ultimos2Anios','rentabilidad.anios.ultimos3Anios','rentabilidad.dias.mensual', 'rentabilidad.dias.semanal','rentabilidad.dias.semestral','valorDeUnidad', 'valorEnPesos']
    payload={}
    headers={}

    def __init__(self):
        self.datafondos = self.data_extract(self.URLFONDOS)
        self.datarentabilidad = None
        #self.origin_df_rent=None
      
    def _extraer_rentabilidades(self):
        #base de rentabilidades y detalles de los fondos
        base_escritura = pd.DataFrame()
        #loop para extraer rentabilidades
        for i in self.datafondos["nit"]:
            url_test = self.URLRENTABILIDADES+"/"+str(i)
            data = self.data_extract(url = url_test)
            base_escritura = base_escritura.append(data, sort = False)
        
        #self.origin_df_rent = self.origin_df_rent.append(base_escritura)
        return base_escritura

    def data_extract(self,url):
        r_fondos = requests.get(url = url, headers=self.headers, data=self.payload,verify = False)
        data_fondos = r_fondos.json()
        data_fondos = pd.json_normalize(data_fondos)
        data_fondos.drop_duplicates(inplace=True)
        data_fondos['nit'] = data_fondos['nit'].astype('int64')
        return data_fondos

    def limpieza_data(self,dataframe):
        dataframe_proccesing = dataframe.copy()
        for i in self.VARIABLES:
            dataframe_proccesing[i] = dataframe_proccesing[i].apply(lambda x: re.sub(r'\,|\.|\$|\%','',x)).astype("float64")/100
        return dataframe_proccesing

    def preproccesing(self,df,vars_):

        vars_pre = vars_.copy()
        vars_pre.append('nit')
        df_preproccesing = df[vars_pre]
        df_preproccesing.drop_duplicates(inplace=True)
        fondos = pd.merge(self.datafondos,df_preproccesing,on = "nit", how = 'left')
        fondos.drop('nombre_y',axis =1,inplace = True)
        fondos.rename(columns = {'nombre_x':'nombre'},inplace=True)
        return fondos

    def build(self):

        vars = ['calificacion','plazo','sociedadAdministradora','nombre']

        datarent = self._extraer_rentabilidades()
        preproccesing = self.preproccesing(datarent,vars_ = vars)
        self.datafondos = preproccesing
        datarent.drop(vars,axis=1,inplace=True)
        datarent = self.limpieza_data(datarent)
        self.datarentabilidad = datarent

        return self.datarentabilidad



In [29]:
prueba = ConsultasApi()
prueba.build()

Unnamed: 0,nit,valorDeUnidad,valorEnPesos,fechaCierre,rentabilidad.dias.semanal,rentabilidad.dias.mensual,rentabilidad.dias.semestral,rentabilidad.anios.anioCorrido,rentabilidad.anios.ultimoAnio,rentabilidad.anios.ultimos2Anios,rentabilidad.anios.ultimos3Anios
0,800227622,23594.73,972223200000.0,20210131,5.95,8.79,6.69,9.02,7.55,6.77,6.19
0,900000528,57090.34,59422690000.0,20210131,-91.35,-53.8,42.01,-54.93,-11.79,-1.8,-3.78
0,900000531,31302.72,1635152000000.0,20210131,4.45,9.49,10.08,9.71,10.33,9.36,8.16
0,800244627,35335.76,1818079000000.0,20210131,2.97,5.34,4.23,5.47,6.0,5.5,5.16
0,900237003,16393.41,1103207000000.0,20210131,-0.01,1.76,2.09,1.79,3.42,3.58,3.54
0,800180687,30863.18,15337940000000.0,20210131,0.26,1.39,1.74,1.42,3.09,3.39,3.42
0,800241308,15124.6,186935600000.0,20210131,-59.61,-7.49,16.84,-7.73,3.63,6.27,3.77
0,9004381462,15047.55,3023414000000.0,20210131,-0.01,1.95,1.84,1.92,3.38,3.62,3.58
0,9004724683,12351.47,349023700000.0,20210131,-90.84,-54.07,45.26,-52.95,-12.41,-2.67,-4.44
0,9006349247,24973.4,304532400000.0,20210131,-47.31,50.33,6.29,48.29,14.8,17.07,11.68


__main__.ConsultasApi

In [None]:
prueba.dataprocessing.head()

In [None]:
prueba.dataprocessing['plazo'].unique()[4].replace(u'dÃ\xadas',u'días')

In [None]:
['aÂ¦o','dÃ\xadas']

In [None]:
prueba.dataprocessing.groupby(["nit","plazo"]).count()["fechaCierre"].to_frame().reset_index()


In [None]:
prueba.dataprocessing[prueba.dataprocessing["plazo"].isnull()]["nit"].unique()

In [None]:
prueba.datafondos.info()

In [None]:
df_preproccesing

In [None]:
df_preproccesing = prueba.dataprocessing[['calificacion','plazo','sociedadAdministradora','nombre','nit']].drop_duplicates()
#prueba.datafondos['nit'] = prueba.datafondos['nit'].astype('int64')
fondos = pd.merge(prueba.datafondos,df_preproccesing,on = "nit", how = 'left')
fondos.drop('nombre_y',axis =1,inplace = True)
fondos.rename(columns = {'nombre_x':'nombre'},inplace=True)

In [None]:
a = fondos['nit'][0]

In [None]:
class DataClean:

    def __init__(self):
        pass

    def limpieza_data(self,df):
        pass

    def 


In [None]:
aws_llamados = AWSHelper()
aws_llamados.init_s3_session(aws_access_key_id=sa.access_key_id,aws_secret_access_key=sa.secret_access_key)

In [None]:
# aws_llamados.call_s3_data( bucket_name="fondosinversion",file_name="datarentabilidad.csv",download_name="datarentabilidad.csv")

In [None]:
datarentabilidad = pd.read_csv("datarentabilidad.csv")

In [None]:
datarentabilidad.head()

In [None]:
datarentabilidad["fechaCierre"].unique()

In [None]:
datarentabilidad.shape

In [None]:
prueba = datarentabilidad['rentabilidad.anios.ultimoAnio'].apply(lambda x: re.sub(r'\,|\.|\$|\%','',x)).astype("float64")/100

In [None]:
prueba