In [1]:
import streamlit as st
import pandas as pd

import shutil
import os

In [2]:
url = 'https://raw.githubusercontent.com/Alejandro-q99/Financial-APP/main/app/Data/consolidated.csv'
df = pd.read_csv(url)
df

Unnamed: 0,diferencia,dq,indice,md,paridad,pq,precio,qp,ticker,tir,ttir,uptir,volumen,vt
0,↑0.91%,43,Badlar,0.19,98.1%,31.8,116.25,27%,BDC24,214.6%,99.0%,14.8%,3.1,118.5
1,↓-1.52%,63,Badlar,0.24,104.2%,33.3,117.00,28%,TB24,172.9%,99.0%,11.6%,0.0,112.3
2,↑1.27%,92,Badlar,0.24,90.9%,34.3,92.55,37%,PBY24,303.6%,99.0%,34.8%,6.4,101.9
3,↓-0.10%,57,Badlar,0.38,84.4%,32.8,96.40,34%,PBA25,295.6%,99.0%,66.8%,22.5,114.3
4,↓-0.37%,153,Fijo,1.00,21.1%,7.8,21.42,36%,TO26,144.0%,99.0%,45.1%,4.2,101.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,↑0.89%,7,CER,0.00,99.9%,-,159.11,-,X23N3,3856.0%,-,-,4304.3,159.2
85,↑2.29%,63,CER,0.17,101.3%,-,133.90,-,X18E4,-7.6%,-,-,719.6,132.2
86,↑1.16%,104,CER,0.40,112.4%,-,43500.00,-,TDF24,-33.1%,-,-,5.4,38707.5
87,↑2.02%,166,CER,0.61,116.9%,-,45400.00,-,TDA24,-27.5%,-,-,2.6,38850.0


In [3]:
df["indice"].unique()

array(['Badlar', 'Fijo', 'CER', 'USS'], dtype=object)

In [4]:
def normalize_percentage(value):
    """
    Convierte un valor de porcentaje en formato string a un decimal.
    Ejemplo: '99.0%' se convierte en 0.99.
    Maneja los valores no numéricos o faltantes adecuadamente.
    """
    try:
        if isinstance(value, str):
            return float(value.strip('%')) / 100
    except ValueError:
        # Retorna None o un valor predeterminado en caso de valores no convertibles
        return None
    return value

In [5]:
def remove_arrows(value):
    """
    Elimina los símbolos de flecha (↑ y ↓) de un string.
    """
    if isinstance(value, str):
        return value.replace('↑', '').replace('↓', '')
    return value

In [6]:
def clean_and_convert_data(df):
    """
    Limpia y convierte los datos del DataFrame.
    """
    cleaned_df = df.copy()

    # Columnas que necesitan normalización de porcentajes
    percentage_columns = ['diferencia', 'paridad', 'qp', 'ttir', 'uptir']

    # Limpiar y convertir porcentajes
    for col in percentage_columns:
        cleaned_df[col] = cleaned_df[col].apply(remove_arrows).apply(normalize_percentage)

    # Convertir volumen y vt a numérico
    cleaned_df['volumen'] = pd.to_numeric(cleaned_df['volumen'], errors='coerce')
    cleaned_df['vt'] = pd.to_numeric(cleaned_df['vt'], errors='coerce')

    return cleaned_df

In [7]:
def percentage_nan(df):
    """
    Calcula el porcentaje de valores NaN en cada columna de un DataFrame.

    :param df: DataFrame de pandas.
    :return: Un DataFrame con el porcentaje de NaN en cada columna.
    """
    # Calcula el total de NaN en cada columna
    total_nan = df.isna().sum()

    # Calcula el porcentaje
    percentage = (total_nan / len(df)) * 100

    return percentage


In [8]:
cleaned_data = clean_and_convert_data(df)
cleaned_data.head()


Unnamed: 0,diferencia,dq,indice,md,paridad,pq,precio,qp,ticker,tir,ttir,uptir,volumen,vt
0,0.0091,43,Badlar,0.19,0.981,31.8,116.25,0.27,BDC24,214.6%,0.99,0.148,3.1,118.5
1,-0.0152,63,Badlar,0.24,1.042,33.3,117.0,0.28,TB24,172.9%,0.99,0.116,0.0,112.3
2,0.0127,92,Badlar,0.24,0.909,34.3,92.55,0.37,PBY24,303.6%,0.99,0.348,6.4,101.9
3,-0.001,57,Badlar,0.38,0.844,32.8,96.4,0.34,PBA25,295.6%,0.99,0.668,22.5,114.3
4,-0.0037,153,Fijo,1.0,0.211,7.8,21.42,0.36,TO26,144.0%,0.99,0.451,4.2,101.5


¿Como imputar los valores  quedarían en NaN?


In [13]:
cleaned_data.to_csv("../Assets/clean.csv")

In [9]:
df_2 = cleaned_data.drop("diferencia", axis=1)
df_2

Unnamed: 0,dq,indice,md,paridad,pq,precio,qp,ticker,tir,ttir,uptir,volumen,vt
0,43,Badlar,0.19,0.981,31.8,116.25,0.27,BDC24,214.6%,0.99,0.148,3.1,118.5
1,63,Badlar,0.24,1.042,33.3,117.00,0.28,TB24,172.9%,0.99,0.116,0.0,112.3
2,92,Badlar,0.24,0.909,34.3,92.55,0.37,PBY24,303.6%,0.99,0.348,6.4,101.9
3,57,Badlar,0.38,0.844,32.8,96.40,0.34,PBA25,295.6%,0.99,0.668,22.5,114.3
4,153,Fijo,1.00,0.211,7.8,21.42,0.36,TO26,144.0%,0.99,0.451,4.2,101.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,7,CER,0.00,0.999,-,159.11,,X23N3,3856.0%,,,4304.3,159.2
85,63,CER,0.17,1.013,-,133.90,,X18E4,-7.6%,,,719.6,132.2
86,104,CER,0.40,1.124,-,43500.00,,TDF24,-33.1%,,,5.4,38707.5
87,166,CER,0.61,1.169,-,45400.00,,TDA24,-27.5%,,,2.6,38850.0


In [10]:
percentage_nan(df_2)

dq         0.000000
indice     0.000000
md         0.000000
paridad    0.000000
pq         0.000000
precio     0.000000
qp         5.617978
ticker     0.000000
tir        0.000000
ttir       5.617978
uptir      5.617978
volumen    0.000000
vt         0.000000
dtype: float64

Creo que una vez cruado así los datos ya podrímamos usarlo.