In [5]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import numpy as np


sys.path.append("../../src/")

from utils.calls import call_industry,call_company

In [6]:
df = pd.read_csv('../../data/processed/balances2022.csv',
                 dtype={'ruc': str, 'codigo': str, 'valor': float})
ciiu = pd.read_csv("../../data/processed/balaces2022_ids.csv",
                   dtype={"ruc":str})[["ruc","ciiu"]]


In [7]:
ind_df = call_industry(mother_df = df, ciiu_df = ciiu, ruc = "1790016919001")
display(ind_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,valor
ruc,codigo,Unnamed: 2_level_1
190151867001,1,3321157.0
190151867001,101,3150890.0
190151867001,10101,663279.0
190151867001,1010101,663279.0
190151867001,1010102,1652505.0
...,...,...
2490397143001,80008,
2490397143001,80009,0.0
2490397143001,801,0.0
2490397143001,80101,0.0


In [8]:
def filter_out(ind_df: pd.DataFrame):
    ind_df = (
    ind_df.pivot_table(index = "ruc",columns = "codigo",values = "valor")
    .pipe(lambda df_: df_[df_["401"] > 0])
    .pipe(lambda df_: df_[~(df_["401"]< df_["401"].quantile(0.05))])
    )
    #series with the mean of the companies below the percentile
    aggr_comp = ind_df.loc[(ind_df["401"]< ind_df["401"].quantile(0.05))].mean()
    #assign the averaged observations 
    ind_df.loc["other",:] = aggr_comp
    ind_df = (
        ind_df
        .melt(var_name = "codigo",value_vars = ind_df.columns,value_name = "valor",ignore_index=False)
        .reset_index()
        .set_index(["ruc","codigo"])
        .sort_index()
        )
    
    return ind_df
ind_df = filter_out(ind_df)
ind_df

Unnamed: 0_level_0,Unnamed: 1_level_0,valor
ruc,codigo,Unnamed: 2_level_1
1091704184001,1,5.549774e+06
1091704184001,101,3.565458e+06
1091704184001,10101,7.468637e+05
1091704184001,1010101,7.514190e+03
1091704184001,1010102,0.000000e+00
...,...,...
other,80008,0.000000e+00
other,80009,0.000000e+00
other,801,1.516789e+01
other,80101,8.794211e+00


In [9]:
(
pd.read_csv("../../data/processed/balances2022_meta.csv")
.pipe(lambda df_: df_.assign(codigo = df_["codigo"].astype(str)))
.query("cuenta.str.contains('GANANCIA')")
 )


Unnamed: 0,codigo,cuenta
365,30601,GANANCIAS ACUMULADAS
373,30701,GANANCIA NETA DEL PERIODO
391,40108,GANANCIA POR MEDICION A VALOR RAZONABLE DE AC...
416,4011003,GANANCIA EN INVERSIONES EN ASOCIADAS / SUBSIDI...
418,4011005,GANANCIA EN VENTA DE TITULOS VALORES
425,402,GANANCIA BRUTA
427,40301,"GANANCIA EN VENTA DE PROPIEDAD, PLANTA Y EQUIPO"
428,40302,GANANCIA EN VENTA DE ACTIVOS BIOLÓGICOS
593,600,GANANCIA (PÉRDIDA) ANTES DE 15% A TRABAJADORES...
595,602,GANANCIA (PÉRDIDA) ANTES DE IMPUESTOS


In [29]:
def metrics_calc(ind_df: pd.DataFrame, ruc: str, kind:str = "industry"):
    metrics = (pd.DataFrame(columns = ind_df.reset_index().columns)
               .set_index(["ruc","codigo"]))

    # Check if the "kind" argument is valid
    if kind not in ("industry", "individual"):
        raise ValueError("Invalid value for 'kind' argument. Use 'industry' or 'individual'.")

    rucs = set(ind_df.index.get_level_values(0))
    print(len(rucs))
    for r_tuple in rucs:
        #curent ratio
        current = (ind_df.loc[(r_tuple,"101"),:]/ind_df.loc[(r_tuple,"201"),:])
        metrics.loc[(r_tuple,"current ratio"),:] = current
        #quick ratio
        quick = (ind_df.loc[(r_tuple,"101"),:]-ind_df.loc[(r_tuple,"10103"),:])/ind_df.loc[(r_tuple,"201"),:]
        metrics.loc[(r_tuple,"quick ratio"),:] = quick
        #D/E ratio
        dve = ind_df.loc[(r_tuple,"2")]/ind_df.loc[(r_tuple,"3")]
        metrics.loc[(r_tuple,"debt vs equity"),:] = dve
        #ROA
        roa = ind_df.loc[(r_tuple,"600"),:]/ind_df.loc[(r_tuple,"1"),:]
        metrics.loc[(r_tuple,"return on assets"),:] = roa
        #ROE
        roe = ind_df.loc[(r_tuple,"600"),:]/ind_df.loc[(r_tuple,"3"),:]
        metrics.loc[(r_tuple,"return on equity"),:] = roe


    #if a complete industry comparison is needed then:
    if kind == "industry":
        #extract info from desired company
        comp_metrics = metrics.xs(str(ruc),level = 0,drop_level = False)
        #metrics from industry sans company
        ind_metrics = (
            metrics
            .loc[metrics.index.get_level_values(0) != str(ruc)]
            .pipe(lambda df_: pd.concat({"industry":df_.groupby(level = "codigo",sort = False).median()},names = ["ruc"]))
        )
        metrics = pd.concat([comp_metrics,ind_metrics])
    return metrics  
metrics = metrics_calc(ind_df = ind_df,ruc = "1792153050001",kind = "industry")
display(metrics)

363


Unnamed: 0_level_0,Unnamed: 1_level_0,valor
ruc,codigo,Unnamed: 2_level_1
1792153050001,current ratio,1.761025
1792153050001,quick ratio,1.233304
1792153050001,debt vs equity,0.853438
1792153050001,return on assets,0.017217
1792153050001,return on equity,0.031911
industry,current ratio,1.410301
industry,quick ratio,0.903284
industry,debt vs equity,1.491393
industry,return on assets,0.013975
industry,return on equity,0.119093


In [35]:
def plot_comparison(metrics: pd.DataFrame):
    fig = px.line_polar(metrics.reset_index(), r= "valor", theta = "codigo", color = "ruc",line_close = True,range_r = (-0.5,metrics["valor"].max()))
    fig.update_traces(fill = "toself")
    fig.show()
plot_comparison(metrics)

**Comparative graph for a single company**

In [18]:
comp = pd.concat([call_company(df,"1790016919001"),call_company(df,1792153050001)]).set_index(["ruc","codigo"])

In [33]:
ind_metrics = metrics_calc(comp,"1790016919001","individual")

2


In [34]:
plot_comparison(ind_metrics)

NameError: name 'plot_comparison' is not defined