# Settings

In [1]:
IN_FILENAME = "../data/processed/formatted.xlsx"
OUT_FILENAME = "../data/processed/calculated.xlsx"

SEPARATOR = "\n"

### adjustments

In [2]:
import re

fmt = re.search("[.][a-z]+$", OUT_FILENAME)
if fmt == None: raise Exception("have to choose a format (xlsx | csv)")
fmt = fmt.group()[1:]

# Prevs

### Imports

In [3]:
import pandas as pd
import numpy as np
from collections import namedtuple
import json

### constants imports

In [4]:
with open("../constans/columns.json", "r") as file: columns, years = json.load(file).values()

columns = namedtuple('Columns', ' '.join(columns.keys()))(**columns)
years = namedtuple('Years', re.sub(r'[\[\],\']','', str([*map(lambda y: f'y{y}', years)])))(*years)

### tools

In [5]:
YYYY = lambda txt, year : txt.replace('[YYYY]', str(year))

## Create namedtuple

In [6]:
Column = namedtuple("Column", "name formula")

# Formulas

### Formulas applier

In [7]:
def calc_columns(df, new_columns):
    for col in new_columns:
        df[col.name] = df.apply(lambda elm: col.formula(elm), axis=1)

### Formulas implementations (if needed)

In [8]:
"""Is Familiar"""

with open("../constans/isfamiliar_data.json", "r") as file: 
      legal_forms, independence_indicators, accionista_tipo = json.load(file).values()

accionista__percent_directo = []

def isFamiliar(elm):
    
	# initial filters
    # 1. consolidation codes !!!!!
    # 2. Ultimate owner is Spain !!!!
    # 3. foreign parent company is the shareholder 
	# 	*(minimun percentage of the chain control from te focal company to its parent company greather than 50%)
	# 4. operating income > 10000 | > 0


	if elm[YYYY(columns.ebit_YYYY, years[-1])] < 10000: return 'unknown'
# ---

	if legal_forms[elm[columns.forma_juridica_detallada]] == "non_familiar" : return 'no'


	tmp = str(elm[columns.accionista__percent_directo]).split(SEPARATOR)
	index = np.argmax(tmp)
	percent = tmp[index]
	
	try:
		percent = float(percent.replace(',', '.'))
	except:
		if percent == '>50,00': percent = 51.00
		elif percent == '>75,00': percent = 76.00
		else:
			accionista__percent_directo.append(percent)
			return 'unknown'
	
	tipo = str(elm[columns.accionista__tipo]).split(SEPARATOR)[index] 
	#print(elm.name, elm.Nombre, tipo, percent, index, elm[columns.bvd])
	match elm[columns.bvd] :
		case 'C' | 'D': 
			if percent >= 50.01: 
				match accionista_tipo[tipo]:
					case 'familiar': return 'yes'
					case 'non_familiar': return 'no'
		case 'B+' | 'B-': 
			if percent >= 25.01:
				match accionista_tipo[tipo]:
					case 'familiar': return 'yes'
					case 'non_familiar': return 'no'
		case 'A': 
			if percent >= 25.01:
				match accionista_tipo[tipo]:
					case 'familiar': return 'yes'
					case 'non_familiar': return 'no'
		case _: return 'unknown'

	return 'unknown'


In [9]:
def z_zones(z):
    if z > 2.6: return "green"
    elif z >= 1.1: return "grey"
    else: return "red"
    

def family_cats(fam):
    match fam:
        case "non_familiar": return 1
        case "familiar": return 2
        case "unknown": return 0

In [10]:
not_nan_filters = [
    *[YYYY(columns.activo_corriente_YYYY, year) for year in years],
    *[YYYY(columns.activo_no_corriente_YYYY, year) for year in years],
    *[YYYY(columns.pasivo_corriente_YYYY, year) for year in years],
    *[YYYY(columns.pasivo_no_corriente_YYYY, year) for year in years],
    
	*[YYYY(columns.importe_neto_de_la_cifra_de_negocios_YYYY, year) for year in years]
    ]

nan_to_zero = [
    *[YYYY(columns.ganancias_acumuladas_YYYY, year) for year in years],
    
	*[YYYY(columns.resultado_ejercicio_YYYY, year) for year in years],
    *[YYYY(columns.dividendos_YYYY, year) for year in years],
    *[YYYY(columns.ebit_YYYY, year) for year in years]
]

not_zero = [
     *[YYYY(columns.total_activo_YYYY, year) for year in years]
]

### Formulas array

In [11]:
new_columns = [
	# z components prevs
    *[Column(f"Ganancias retenidas {year}", lambda df, year=year: 
             df[YYYY(columns.ganancias_acumuladas_YYYY, year)] + df[YYYY(columns.resultado_ejercicio_YYYY, year)] + df[YYYY(columns.dividendos_YYYY, year)]) for year in years],
    *[Column(f"Pasivo total {year}", lambda df, year=year: df[YYYY(columns.pasivo_corriente_YYYY, year)] + df[YYYY(columns.pasivo_no_corriente_YYYY, year)]) for year in years],
    
	# indexes components
    #*[Column(f"TA_{year}", lambda df, year=year: df[YYYY(columns.activo_corriente_YYYY, year)]+df[YYYY(columns.activo_no_corriente_YYYY, year)]) for year in years],
    *[Column(f"TD_{year}", lambda df, year=year: df[YYYY(columns.pasivo_corriente_YYYY, year)]+df[YYYY(columns.pasivo_no_corriente_YYYY, year)]) for year in years],
    *[Column(f"WC_{year}", lambda df, year=year: df[YYYY(columns.activo_corriente_YYYY, year)]-df[YYYY(columns.pasivo_corriente_YYYY, year)]) for year in years],
    
	# z components Xn
	*[Column(f"Z X1 {year}", lambda df, year=year: df[YYYY(columns.WC_YYYY , year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
    *[Column(f"Z X2 {year}", lambda df, year=year: df[YYYY(columns.ganancias_retenidas_YYYY, year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
    *[Column(f"Z X3 {year}", lambda df, year=year: df[YYYY(columns.ebit_YYYY, year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
    *[Column(f"Z X4 {year}", lambda df, year=year: df[YYYY(columns.patrimonio_neto_YYYY, year)]/df[YYYY(columns.total_pasivo_YYYY, year)]) for year in years],
    *[Column(f"Z X5 {year}", lambda df, year=year: df[YYYY(columns.importe_neto_de_la_cifra_de_negocios_YYYY, year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
      
	# z altman
	*[Column(f"Z {year}", lambda df, year=year: 
          .717*df[YYYY(columns.zX1_YYYY, year)] + .847*df[YYYY(columns.zX2_YYYY, year)] + 3.107*df[YYYY(columns.zX3_YYYY, year)] + .42*df[YYYY(columns.zX4_YYYY, year)] + .998*df[YYYY(columns.zX5_YYYY, year)]
          ) for year in years],
          
	# z altman zones
    *[Column(f"Z Green Zone {year}", lambda df, year=year: 1 if z_zones(df[YYYY(columns.z_YYYY, year)]) == "green" else 0) for year in years],
    *[Column(f"Z Grey Zone {year}", lambda df, year=year: 1 if z_zones(df[YYYY(columns.z_YYYY, year)]) == "grey" else 0) for year in years],
    *[Column(f"Z Red Zone {year}", lambda df, year=year: 1 if z_zones(df[YYYY(columns.z_YYYY, year)]) == "red" else 0) for year in years],
    
	# indexes
    *[Column(f"TD/TA_{year}", lambda df, year=year: df[YYYY(columns.TD_YYYY, year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
    *[Column(f"WC/TA_{year}", lambda df, year=year: df[YYYY(columns.WC_YYYY, year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
    *[Column(f"EBITDA/TA_{year}", lambda df, year=year: df[YYYY(columns.ebitda_YYYY, year)]/df[YYYY(columns.total_activo_YYYY, year)]) for year in years],
    *[Column(f"ln(TA)_{year}", lambda df, year=year: np.log(df[YYYY(columns.total_activo_YYYY, year)])) for year in years],
     Column("isFamiliar", lambda df: isFamiliar(df))
]

# Main

### Import dataframe

In [12]:
df = pd.read_excel(IN_FILENAME)

In [13]:
df[YYYY(columns.total_activo_YYYY, 2023)]

0       1.915934e+08
1       3.674004e+08
2       3.421290e+08
3       3.920595e+08
4       2.984650e+08
            ...     
1780    1.402443e+06
1781    9.660041e+04
1782    4.542240e+03
1783    2.419699e+06
1784    2.252492e+05
Name: Total Activo\nEUR\n2023, Length: 1785, dtype: float64

In [14]:
print(nan_to_zero)

['Ganancias acumuladas\nEUR\n2021', 'Ganancias acumuladas\nEUR\n2022', 'Ganancias acumuladas\nEUR\n2023', 'Resultado del Ejercicio\nEUR\n2021', 'Resultado del Ejercicio\nEUR\n2022', 'Resultado del Ejercicio\nEUR\n2023', 'Dividendos (-)\nEUR\n2021', 'Dividendos (-)\nEUR\n2022', 'Dividendos (-)\nEUR\n2023', 'EBIT\nEUR\n2021', 'EBIT\nEUR\n2022', 'EBIT\nEUR\n2023']


In [15]:
# aqui esta el problema

for f in nan_to_zero: df[f] = df[f].replace('n.d.', 0.0)
df.shape

  for f in nan_to_zero: df[f] = df[f].replace('n.d.', 0.0)


(1785, 2350)

In [16]:
for f in not_nan_filters: df = df[df[f] != 'n.d.']
df.shape

(1363, 2350)

In [17]:
for f in not_nan_filters: df[f] = df[f].astype(float)
for f in not_zero: df[f] = df[f].astype(float)
for f in nan_to_zero: df[f] = df[f].astype(float)
df.shape

(1363, 2350)

In [18]:
df[YYYY(columns.total_activo_YYYY, 2023)]

0       1.915934e+08
1       3.674004e+08
2       3.421290e+08
3       3.920595e+08
4       2.984650e+08
            ...     
1756    7.126363e+04
1761    2.978639e+04
1762    2.234136e+05
1769    2.991848e+04
1770    1.248302e+06
Name: Total Activo\nEUR\n2023, Length: 1363, dtype: float64

In [19]:
for f in not_zero: df = df[df[f] != 0.0]
df.shape

(1363, 2350)

In [20]:
for elm in [*[df[YYYY(columns.total_activo_YYYY, year)] for year in years]]:
    if (elm == 0).any():
        print("Oh no")
        break

### Calc new columns

In [21]:
calc_columns(df, new_columns)
np.unique(accionista__percent_directo)

array(['-', 'MO', 'NG'], dtype='<U2')

In [22]:
df.shape

(1363, 2402)

### Save dataframe

In [23]:
match fmt:
    case "xlsx": df.to_excel(OUT_FILENAME, index=False)
    case "csv": df.to_csv(OUT_FILENAME, index=False)
    case _: raise Exception("Stop touching things!!")

In [24]:
from playsound import playsound

playsound('../ALARM.mp3')