# Settings

In [20]:
IN_FILENAME = "../data/processed/formatted.xlsx"
OUT_FILENAME = "../data/processed/calculated.xlsx"

SEPARATOR = "\n"

### adjustments

In [21]:
import re

fmt = re.search("[.][a-z]+$", OUT_FILENAME)
if fmt == None: raise Exception("have to choose a format (xlsx | csv)")
fmt = fmt.group()[1:]

# Prevs

### Imports

In [22]:
import pandas as pd
import numpy as np
from collections import namedtuple
import json

### constants imports

In [23]:
with open("../constans/columns.json", "r") as file: columns, years = json.load(file).values()

columns = namedtuple('Columns', ' '.join(columns.keys()))(**columns)
years = namedtuple('Years', re.sub(r'[\[\],\']','', str([*map(lambda y: f'y{y}', years)])))(*years)

### tools

In [24]:
YYYY = lambda txt, year : txt.replace('[YYYY]', str(year))

## Create namedtuple

In [25]:
Column = namedtuple("Column", "name formula")

# Formulas

### Formulas applier

In [26]:
def calc_columns(df, new_columns):
    for col in new_columns:
        df[col.name] = df.apply(lambda elm: col.formula(elm), axis=1)

### Formulas implementations (if needed)

In [27]:
"""Is Familiar"""

with open("../constans/isfamiliar_data.json", "r") as file: 
      legal_forms, independence_indicators, accionista_tipo = json.load(file).values()

accionista__percent_directo = []

def isFamiliar(elm):
    
	# initial filters
    # 1. consolidation codes !!!!!
    # 2. Ultimate owner is Spain !!!!
    # 3. foreign parent company is the shareholder 
	# 	*(minimun percentage of the chain control from te focal company to its parent company greather than 50%)
	# 4. operating income > 10000 | > 0


	if elm[YYYY(columns.ebit_YYYY, years[-1])] < 10000: return 'unknown'
# ---

	if legal_forms[elm[columns.forma_juridica_detallada]] == "non_familiar" : return 'no'


	tmp = str(elm[columns.accionista__percent_directo]).split(SEPARATOR)
	index = np.argmax(tmp)
	percent = tmp[index]
	
	try:
		percent = float(percent.replace(',', '.'))
	except:
		if percent == '>50,00': percent = 51.00
		elif percent == '>75,00': percent = 76.00
		else:
			accionista__percent_directo.append(percent)
			return 'unknown'
	
	tipo = str(elm[columns.accionista__tipo]).split(SEPARATOR)[index] 
	#print(elm.name, elm.Nombre, tipo, percent, index, elm[columns.bvd])
	match elm[columns.bvd] :
		case 'C' | 'D': 
			if percent >= 50.01: 
				match accionista_tipo[tipo]:
					case 'familiar': return 'yes'
					case 'non_familiar': return 'no'
		case 'B+' | 'B-': 
			if percent >= 25.01:
				match accionista_tipo[tipo]:
					case 'familiar': return 'yes'
					case 'non_familiar': return 'no'
		case 'A': 
			if percent >= 25.01:
				match accionista_tipo[tipo]:
					case 'familiar': return 'yes'
					case 'non_familiar': return 'no'
		case _: return 'unknown'

	return 'unknown'


In [28]:
not_nan_filters = [*[YYYY(columns.activo_corriente_YYYY, year) for year in years],
                   *[YYYY(columns.activo_no_corriente_YYYY, year) for year in years],
                   *[YYYY(columns.pasivo_corriente_YYYY, year) for year in years],
                   *[YYYY(columns.pasivo_no_corriente_YYYY, year) for year in years]
                   ]

### Formulas array

In [29]:
new_columns = [
    *[Column(f"TA_{year}", lambda df, year=year: df[YYYY(columns.activo_corriente_YYYY, year)]/df[YYYY(columns.activo_no_corriente_YYYY, year)]) for year in years],
    *[Column(f"TD_{year}", lambda df, year=year: df[YYYY(columns.pasivo_corriente_YYYY, year)]/df[YYYY(columns.pasivo_no_corriente_YYYY, year)]) for year in years],
    *[Column(f"WC_{year}", lambda df, year=year: df[YYYY(columns.activo_corriente_YYYY, year)]-df[YYYY(columns.pasivo_corriente_YYYY, year)]) for year in years],
    
    *[Column(f"TD/TA_{year}", lambda df, year=year: df[YYYY(columns.TD_YYYY, year)]/df[YYYY(columns.TA_YYYY, year)]) for year in years],
    *[Column(f"WC/TA_{year}", lambda df, year=year: df[YYYY(columns.WC_YYYY, year)]/df[YYYY(columns.TA_YYYY, year)]) for year in years],
    *[Column(f"EBITDA/TA_{year}", lambda df, year=year: df[YYYY(columns.ebitda_YYYY, year)]/df[YYYY(columns.TA_YYYY, year)]) for year in years],
    *[Column(f"ln(TA)_{year}", lambda df, year=year: np.log(df[YYYY(columns.TA_YYYY, year)])) for year in years],
     Column("isFamiliar", lambda df: isFamiliar(df))
]

# Main

### Import dataframe

In [30]:
df = pd.read_excel(IN_FILENAME)

In [31]:
for f in not_nan_filters: df = df[df[f] != 'n.d.']

In [32]:
for f in not_nan_filters: df[f] = df[f].astype(float)

### Calc new columns

In [33]:
calc_columns(df, new_columns)
np.unique(accionista__percent_directo)

array(['-', 'MO', 'NG'], dtype='<U2')

### Save dataframe

In [34]:
match fmt:
    case "xlsx": df.to_excel(OUT_FILENAME, index=False)
    case "csv": df.to_csv(OUT_FILENAME, index=False)
    case _: raise Exception("Stop touching things!!")