# Generalyzed ETL Script

El propósito de este notebook es proporcionar un código a partir del cual se haga toda la limpieza de datos correspondiente, pero para distintos formatos de bases de datos y años de contrato. 

Para un mayor entendimiento, se dejará a modo de ejemplo ilustrativo un script anterior en el que se explican con detalle todos los pasos y el porqué de dichos pasos.

In [1]:
import pandas as pd
import numpy as np
import math
import os
import warnings
import statsmodels.api as sm
from matplotlib.colors import ListedColormap
from termcolor import colored
print('Modulos importados')

Modulos importados


In [2]:
# Configuraciones
warnings.filterwarnings('ignore')

In [3]:
# Directorio de trabajo
print("Directorio de trabajo previo: " + str(os.getcwd()))
# Cambiemoslo
os.chdir('/home/usuario/Documentos/Github/Proyectos/MLB_HN/')

Directorio de trabajo previo: /home/usuario/Documentos/Github/Proyectos/MLB_HN/ETL_Scripts


In [4]:
# Veamos el directorio actual de trabajo
print(os.getcwd())
# El directorio anterior es el correcto, pero si no lo fuese, hacemos lo sigueinte:
path = '/home/usuario/Documentos/Github/Proyectos/MLB_HN'
print("Nuevo directorio de trabajo: " + str(os.chdir(path)))

/home/usuario/Documentos/Github/Proyectos/MLB_HN
Nuevo directorio de trabajo: None


Puesto que los datos geográficos de los equipos son los mismos para todos los tratamientos, se realizará primero esa parte

In [5]:
# Cantidad de equipos por estado
states = 'Data/Teams/team_states.csv'
df_states = pd.read_csv(states)
# Acrónimos de cada equipo
acronym = 'Data/Teams/team_acronym.csv'
df_acronym = pd.read_csv(acronym)
# Merge de amabas bases de datos
acronym_state = pd.merge(df_states,
                         df_acronym,
                         on = 'Estado')

Como se desea obtener todos los paneles con respecto a los años de contratación, hallemos periodo de contratación más grande que se encuentra en todas las bases de datos

In [6]:
# Ruta de lectura:
free_agents = 'Data/Cumulative/Free_Agents/free_agents_'
csv = '.csv'
period = 12
# DF original:
df_free_agents = [None]*period
# Copias:
df_free_agents_copy = [None]*period

# Limpieza de df:
for year in range(0,period):
    # Lectura
    df_free_agents[year] = pd.read_csv(free_agents + str(2011 + year) + csv)
    df_free_agents_copy[year] = df_free_agents[year].copy()
    
    # Drop columns:
    if any(name in df_free_agents_copy[year].columns for name in ['Rank','Pos','Year','Team From To']):
        df_free_agents_copy[year].drop('Rank', axis = 1, inplace = True)
        df_free_agents_copy[year].drop('Year', axis = 1, inplace = True)
        df_free_agents_copy[year].drop('Pos', axis = 1, inplace = True)
        df_free_agents_copy[year].drop('Team From To', axis = 1, inplace = True)
    
    # Base de datos de agentes libres:
    df_free_agents_copy[year].drop(df_free_agents_copy[year].columns[df_free_agents_copy[year].columns.str.contains('Unnamed', 
                                                                                                                    case = False)], 
                                   axis = 1,
                                   inplace = True)

Guardemos en una lista la cantida máxima de años de contrato de cada base de datos y obtengamos el mayor número que contiene

In [7]:
# Lista de máximos:
max_contract_yrs = [0]*period
# Máximo de cada df:
for year in range(0,period):
    max_contract_yrs[year] = df_free_agents_copy[year]['YRS'].max(skipna = True)
# Máximo de todas los df:
max_contrac_yrs_panel = max(max_contract_yrs)
print("Mayor cantidad de años para un contrato: " + str(max_contrac_yrs_panel))

Mayor cantidad de años para un contrato: 13


La estrategia que se llevará a cabo es realizar dos bucles iterados donde se itere sobre todos los tipos de datos y sobre cada cantidad de años de contrato.

In [19]:
# Tipos de bases de datos:
df_types = ['Cumulative', 'Per_10_Games', 'Per_Game', 'Yearly_Average']
#  Años de contrato
auxiliar_contract_yrs = [year for year in range(0,max_contrac_yrs_panel)]
contract_yrs = list(map(lambda x : x + 1, auxiliar_contract_yrs))
# Auxiliares de ruta
teams = 'ETL_Data/Agent/Teams/free_agents_team_'
csv = '.csv'
# Auxiliares de variables fijas
period = 12
hitting_names = ['Juegos_iniciados', 'Porcentaje_juegos_iniciados', 'Al-bate', 'Bateos',
                  'Dobles', 'Triples', 'Home-runs', 'Runs-batted-in', 'Bateos_promedio',
                  'Porcentaje_on-base', 'Porcentaje_slugging', 'TVS',
                  'Porcentaje_On-base-plus-slugging', 'WAR']	
pitching_names = ['Inning_pitched', 'Bateos', 'Carreras',
                  'Carreras_ganadas', 'Walks', 'Strike-outs', 'Wins', 'Losses',
                  'Saves', 'WHIP', 'ERA', 'WAR', 'TVS', 'Dominio', 'Control',
                  'Comando']
hitting_merge = ['Juegos_iniciados', 'Porcentaje_juegos_iniciados', 'Al-bate', 'Bateos',
                 'Dobles', 'Triples', 'Home-runs', 'Runs-batted-in', 'Bateos_promedio',
                 'Porcentaje_on-base', 'Porcentaje_slugging', 'TVS',
                 'Porcentaje_On-base-plus-slugging', 'WAR',
                 'Juegos_iniciados_2', 'Porcentaje_juegos_iniciados_2', 'Al-bate_2', 'Bateos_2',
                 'Dobles_2', 'Triples_2', 'Home-runs_2', 'Runs-batted-in_2', 'Bateos_promedio_2',
                 'Porcentaje_on-base_2', 'Porcentaje_slugging_2', 'TVS_2',
                 'Porcentaje_On-base-plus-slugging_2', 'WAR_2']	
pitching_merge = ['Inning_pitched', 'Bateos_en_contra', 'Carreras_en_contra',
                  'Carreras_ganadas', 'Walks', 'Strike-outs', 'Wins', 'Losses',
                  'Saves', 'WHIP', 'ERA', 'WAR', 'TVS', 'Dominio', 'Control',
                  'Comando',
                  'Inning_pitched_2', 'Bateos_2', 'Carreras_2',
                  'Carreras_ganadas_2', 'Walks_2', 'Strike-outs_2', 'Wins_2', 'Losses_2',
                  'Saves_2', 'WHIP_2', 'ERA_2', 'WAR_2', 'TVS_2', 'Dominio_2', 'Control_2',
                  'Comando_2']
hitter_regular_stats = ['At_bats_2_t_1', 'At_bats_t_1',
                        'Bateos_2_t_1', 'Bateos_t_1',
                        'Bateos_promedio_2_t_1', 'Bateos_promedio_t_1',
                        'Dobles_2_t_1', 'Dobles_t_1',
                        'Home_runs_2_t_1', 'Home_runs_t_1',
                        'Juegos_iniciados_2_t_1', 'Juegos_iniciados_t_1', 
                        'Porcentaje_On_base_plus_slugging_2_t_1', 'Porcentaje_On_base_plus_slugging_t_1',
                        'Porcentaje_on_base_2_t_1', 'Porcentaje_on_base_t_1',
                        'Porcentaje_slugging_2_t_1', 'Porcentaje_slugging_t_1',
                        'Runs_batted_in_2_t_1', 'Runs_batted_in_t_1',
                        'Triples_2_t_1', 'Triples_t_1',
                        'WAR_2_t_1', 'WAR_t_1']
hitter_regular_stats = sorted(hitter_regular_stats)
pitcher_regular_stats = ['Bateos_2_t_1', 'Bateos_t_1',
                        'Carreras_2_t_1', 'Carreras_t_1',
                        'Carreras_ganadas_2_t_1', 'Carreras_ganadas_t_1',
                        'Comando_2_t_1', 'Comando_t_1',
                        'Control_2_t_1', 'Control_t_1',
                        'Dominio_2_t_1', 'Dominio_t_1', 
                        'ERA_2_t_1', 'ERA_t_1',
                        'Inning_pitched_2_t_1', 'Inning_pitched_t_1',
                        'Losses_2_t_1', 'Losses_t_1',
                        'Saves_2_t_1', 'Saves_t_1',
                        'Strike_outs_2_t_1', 'Strike_outs_t_1',
                        'WAR_2_t_1', 'WAR_t_1',
                        'WHIP_2_t_1', 'WHIP_t_1',
                        'Walks_2_t_1', 'Walks_t_1',
                        'Wins_2_t_1', 'Wins_t_1']
pitcher_regular_stats = sorted(pitcher_regular_stats)
# Función auxiliar para índices 
# 
def get_col_indices(df, names):
    return df.columns.get_indexer(names)

In [18]:
for df_type in df_types:
    # -----------------------------------------------------------------------------------
    # ---------------------------------- Importación ------------------------------------
    # -----------------------------------------------------------------------------------
    # Rutas para las bases:
    free_agents = 'Data/' + df_types[0] + '/Free_Agents/free_agents_'
    hitting = 'Data/' + df_types[0] + '/Not_All_Variables/Statistics/Hitting/hitting_'
    pitching = 'Data/' + df_types[0] + '/Not_All_Variables/Statistics/Pitching/pitching_'
    salary = 'Data/' + df_types[0] + '/Not_All_Variables/Salary/salary_'
    # Originales:
    df_free_agents = [None]*period
    df_hitting = [None]*period
    df_pitching = [None]*period
    df_salary = [None]*period
    df_teams = [None]*period
    # Copias:
    df_free_agents_copy = [None]*period
    df_hitting_copy = [None]*period
    df_pitching_copy = [None]*period
    df_salary_copy = [None]*period
    df_teams_copy = [None]*period
    # Producto final:
    df_pitchers = [None]*period
    df_hitters = [None]*period
    df_pitchers_free_agents = [None]*period
    df_hitters_free_agents = [None]*period
    df_pitchers_no_free_agents = [None]*period
    df_hitters_no_free_agents = [None]*period
    df_panel_hitters = [None]*period
    df_panel_pitchers = [None]*period
    
    # Lectura de bases de datos:
    for year in range(0,period):    
        df_free_agents[year] = pd.read_csv(free_agents + str(2011 + year) + csv)
        df_hitting[year] = pd.read_csv(hitting + str(2011 + year) + csv)
        df_pitching[year] = pd.read_csv(pitching + str(2011 + year) + csv)
        df_salary[year] = pd.read_csv(salary + str(2011 + year) + csv)
        df_teams[year] = pd.read_csv(teams + str(2011 + year) + csv)

        df_free_agents_copy[year] = df_free_agents[year].copy()
        df_hitting_copy[year] = df_hitting[year].copy()
        df_pitching_copy[year] = df_pitching[year].copy()
        df_salary_copy[year] = df_salary[year].copy()
        df_teams_copy[year] = pd.read_csv(teams + str(2011 + year) + csv)
    
    # -----------------------------------------------------------------------------------
    # ---------------------------------------- ETL --------------------------------------
    # -----------------------------------------------------------------------------------
    
    #
    # -------------------------------------- Limpieza -----------------------------------
    #
    for year in range(0,period): 
        # Borrado de columnas inútiles
        #
        # Agentes libres:
        if any(name in df_free_agents_copy[year].columns for name in ['Rank','Pos','Year','Team From To']):
            df_free_agents_copy[year].drop('Rank', axis = 1, inplace = True)
            df_free_agents_copy[year].drop('Year', axis = 1, inplace = True)
            df_free_agents_copy[year].drop('Pos', axis = 1, inplace = True)
            df_free_agents_copy[year].drop('Team From To', axis = 1, inplace = True)
        # Salarios:
        if 'Rank' in df_salary_copy[year].columns:
            df_salary_copy[year].drop('Rank', axis = 1, inplace = True)
        # Bateadores:
        if any(name in df_hitting_copy[year].columns for name in ['Rank','Year','Cash2023','Team','Pos']):
            df_hitting_copy[year].drop('Rank', axis = 1, inplace = True)
            df_hitting_copy[year].drop('Cash2023', axis = 1, inplace = True)
            df_hitting_copy[year].drop('Team', axis = 1, inplace = True)
            df_hitting_copy[year].drop('Pos', axis = 1, inplace = True)
        # Fildeadores
        if any(name in df_pitching_copy[year].columns for name in ['Rank','Year','Cash2023','Team','Pos']):
            df_pitching_copy[year].drop('Rank', axis = 1, inplace = True)
            df_pitching_copy[year].drop('Cash2023', axis = 1, inplace = True)
            df_pitching_copy[year].drop('Team', axis = 1, inplace = True)
            df_pitching_copy[year].drop('Pos', axis = 1, inplace = True)
         
        # Borrar columnas inombradas:
        #
        # Agentes libres:
        df_free_agents_copy[year].drop(df_free_agents_copy[year].columns[df_free_agents_copy[year].columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
        # Salarios:
        df_salary_copy[year].drop(df_salary_copy[year].columns[df_salary_copy[year].columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
        # Bateadores:
        df_hitting_copy[year].drop(df_hitting_copy[year].columns[df_hitting_copy[year].columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
        # Fildeadores:
        df_pitching_copy[year].drop(df_pitching_copy[year].columns[df_pitching_copy[year].columns.str.contains('Unnamed',case = False)],axis = 1, inplace = True)
        
    #
    # Limpieza - Agentes libres
    #
    for year in range(0,period):
        df_free_agents_copy[year] = df_free_agents_copy[year].rename(columns = {'Player':'Jugador',
                                    'Status':'Status_agente_libre', 'Team From':'Equipo_anterior',
                                    'Value':'Valor_contrato', 'AAV':'Valor_promedio_contrato',
                                    'YRS':'Anios_de_contrato'})
        # Valor de contrato:
        df_free_agents_copy[year]['Valor_contrato'] = df_free_agents_copy[year]['Valor_contrato'].values.astype('str')
        free_agents_aux_1 = df_free_agents_copy[year]['Valor_contrato'].str.replace("$","")
        free_agents_aux_2 = free_agents_aux_1.str.replace(",","")
        # Valorpromedio de contrato:
        df_free_agents_copy[year]['Valor_promedio_contrato'] = df_free_agents_copy[year]['Valor_promedio_contrato'].values.astype('str')
        free_agents_aux_3 = df_free_agents_copy[year]['Valor_promedio_contrato'].str.replace("$","")
        free_agents_aux_4 = free_agents_aux_3.str.replace(",","")
        df_free_agents_copy[year]['Valor_contrato'] = free_agents_aux_2
        df_free_agents_copy[year]['Valor_promedio_contrato'] = free_agents_aux_4
        # COnversión a data numérica:
        df_free_agents_copy[year]['Valor_contrato'] = pd.to_numeric(df_free_agents_copy[year]['Valor_contrato'])
        df_free_agents_copy[year]['Valor_promedio_contrato'] = pd.to_numeric(df_free_agents_copy[year]['Valor_promedio_contrato'])
    
    #
    # Limpieza - Salarios
    #
    for year in range(0,period):
        # Cambio de nombres
        df_salary_copy[year] = df_salary_copy[year].rename(columns = {'Player':'Jugador',
                                'BaseSalary':'Sueldo_base', 'SigningBonus':'Bono_por_firma',
                                'Payroll Salary':'Sueldo_regular', 'Adj Salary':'Sueldo_ajustado',
                                'CONT YR':'Anios_de_contrato', 'CONT VALUE':'Valor_del_contrato',
                                'Earnings':'Ganancias', 'FA Year':'Anio_de_agente_libre',
                                'Sign Age':'Edad_al_firmar', 'Age':'Edad', 'Weight':'Peso',
                                'Height':'Altura', 'Year':'Anio', 'Pos':'Posicion',
                                'Salary%':'Sueldo_porcentual', 'Cash':'Pago_efectivo',
                                'AAV':'Valor_contrato_promedio', 'Team':'Acronimo'})

        # Tranformando al tipo de dato apropiado
        #
        # Sueldo base:
        df_salary_copy[year]['Sueldo_base'] = df_salary_copy[year]['Sueldo_base'].values.astype('str')
        salary_aux_1 = df_salary_copy[year]['Sueldo_base'].str.replace("$","")
        salary_aux_2 = salary_aux_1.str.replace(",","")
        df_salary_copy[year]['Sueldo_base'] = salary_aux_2
        df_salary_copy[year]['Sueldo_base'] = pd.to_numeric(df_salary_copy[year]['Sueldo_base'])

        # Sueldo regular:
        df_salary_copy[year]['Sueldo_regular'] = df_salary_copy[year]['Sueldo_regular'].values.astype('str')
        salary_aux_3 = df_salary_copy[year]['Sueldo_regular'].str.replace("$","")
        salary_aux_4 = salary_aux_3.str.replace(",","")
        df_salary_copy[year]['Sueldo_regular'] = salary_aux_4
        df_salary_copy[year]['Sueldo_regular'] = pd.to_numeric(df_salary_copy[year]['Sueldo_regular'])

        # Sueldo ajustado:
        df_salary_copy[year]['Sueldo_ajustado'] = df_salary_copy[year]['Sueldo_ajustado'].values.astype('str')
        salary_aux_5 = df_salary_copy[year]['Sueldo_ajustado'].str.replace("$","")
        salary_aux_6 = salary_aux_5.str.replace(",","")
        df_salary_copy[year]['Sueldo_ajustado'] = salary_aux_6
        df_salary_copy[year]['Sueldo_ajustado'] = pd.to_numeric(df_salary_copy[year]['Sueldo_ajustado'])

        # Valor del contrato:
        df_salary_copy[year]['Valor_del_contrato'] = df_salary_copy[year]['Valor_del_contrato'].values.astype('str')
        salary_aux_7 = df_salary_copy[year]['Valor_del_contrato'].str.replace("$","")
        salary_aux_8 = salary_aux_7.str.replace(",","")
        df_salary_copy[year]['Valor_del_contrato'] = salary_aux_8
        df_salary_copy[year]['Valor_del_contrato'] = pd.to_numeric(df_salary_copy[year]['Valor_del_contrato'])

        # Bono por firma:
        df_salary_copy[year]['Bono_por_firma'] = df_salary_copy[year]['Bono_por_firma'].values.astype('str')
        salary_aux_9 = df_salary_copy[year]['Bono_por_firma'].str.replace("$","")
        salary_aux_10 = salary_aux_9.str.replace(",","")
        df_salary_copy[year]['Bono_por_firma'] = salary_aux_10
        df_salary_copy[year]['Bono_por_firma'] = pd.to_numeric(df_salary_copy[year]['Bono_por_firma'])

        # Ganancias:
        df_salary_copy[year]['Ganancias'] = df_salary_copy[year]['Ganancias'].values.astype('str')
        salary_aux_11 = df_salary_copy[year]['Ganancias'].str.replace("$","")
        salary_aux_12 = salary_aux_11.str.replace(",","")
        df_salary_copy[year]['Ganancias'] = salary_aux_12
        df_salary_copy[year]['Ganancias'] = pd.to_numeric(df_salary_copy[year]['Ganancias'])

        # Pago en efectivo:
        df_salary_copy[year]['Pago_efectivo'] = df_salary_copy[year]['Pago_efectivo'].values.astype('str')
        salary_aux_13 = df_salary_copy[year]['Pago_efectivo'].str.replace("$","")
        salary_aux_14 = salary_aux_13.str.replace(",","")
        df_salary_copy[year]['Pago_efectivo'] = salary_aux_14
        df_salary_copy[year]['Pago_efectivo'] = pd.to_numeric(df_salary_copy[year]['Pago_efectivo'])

        # Valor de contrato promedio:
        df_salary_copy[year]['Valor_contrato_promedio'] = df_salary_copy[year]['Valor_contrato_promedio'].values.astype('str')
        salary_aux_15 = df_salary_copy[year]['Valor_contrato_promedio'].str.replace("$","")
        salary_aux_16 = salary_aux_15.str.replace(",","")
        df_salary_copy[year]['Valor_contrato_promedio'] = salary_aux_16
        df_salary_copy[year]['Valor_contrato_promedio'] = pd.to_numeric(df_salary_copy[year]['Valor_contrato_promedio'])

        # Altura:
        df_salary_copy[year]['Altura'] = df_salary_copy[year]['Altura'].values.astype('str')
        salary_aux_17 = df_salary_copy[year]['Altura'].str.replace("\"","")
        salary_aux_18 = salary_aux_17.str.replace("'","")
        df_salary_copy[year]['Altura'] = salary_aux_18
        df_salary_copy[year]['Altura'] = pd.to_numeric(df_salary_copy[year]['Altura'])/10

        # Sustitución de los ceros:
        height_mean = df_salary_copy[year]['Altura'].mean(skipna=True)
        df_salary_copy[year]['Altura'] = df_salary_copy[year].Altura.mask(df_salary_copy[year].Altura == 0, height_mean)

        df_salary_copy[year]['Anio_de_agente_libre'] = pd.to_numeric(df_salary_copy[year]['Anio_de_agente_libre'])
        df_salary_copy[year]['Anios_de_contrato'] = pd.to_numeric(df_salary_copy[year]['Anios_de_contrato'])
        df_salary_copy[year]['Edad'] = pd.to_numeric(df_salary_copy[year]['Edad'])
            
    # Imputación de la edad al firmar:
    for year in range (0,period):
        df_salary_copy[year]['Edad_al_firmar'] = df_salary_copy[year]['Edad_al_firmar'].map(str)

        for edad in range(0,df_salary_copy[year].shape[0]):
            # String es mayor que 0:
            if len(df_salary_copy[year]['Edad_al_firmar'].iloc[edad]) == 2:
                df_salary_copy[year].iloc[edad, df_salary_copy[year].columns.get_loc('Edad_al_firmar')] = pd.to_numeric(df_salary_copy[year]['Edad_al_firmar'].iloc[edad])

            # String es menor o igual que 0:
            elif len(df_salary_copy[year]['Edad_al_firmar'].iloc[edad]) != 2:
                # Si la columna de la edad contiene datos correctos
                if df_salary_copy[year]['Edad'].iloc[edad] > 0:
                    if df_salary_copy[year]['Anio_de_agente_libre'].iloc[edad] == 0:
                        ag_year = year + 2011 + 1
                    else:
                        ag_year = df_salary_copy[year]['Anio_de_agente_libre'].iloc[edad]
                    # Get first year of contract
                    ini_year = ag_year - df_salary_copy[year]['Anios_de_contrato'].iloc[edad]
                    # Años desde el el año inicial
                    dif_years = year + 2011 - ini_year
                    # Edad al firmar:
                    sign_age = df_salary_copy[year]['Edad'].iloc[edad] - dif_years
                    # Cambio de dato:
                    df_salary_copy[year].iloc[edad, df_salary_copy[year].columns.get_loc('Edad_al_firmar')] = pd.to_numeric(sign_age)

                # Si la columna de edad no contiene un dato coherente
                else:
                    # Cambio de dato:
                    df_salary_copy[year].iloc[edad, df_salary_copy[year].columns.get_loc('Edad_al_firmar')] = pd.to_numeric(18)    

            # Entero  menor a 0:
            if df_salary_copy[year]['Edad_al_firmar'].iloc[edad] < 0:
                # Si la columna de la edad contiene datos correctos
                if df_salary_copy[year]['Edad'].iloc[edad] > 0:
                    if df_salary_copy[year]['Anio_de_agente_libre'].iloc[edad] == 0:
                        ag_year = year + 2011 + 1
                    else:
                        ag_year = df_salary_copy[year]['Anio_de_agente_libre'].iloc[edad]
                    # Get first year of contract
                    ini_year = ag_year - df_salary_copy[year]['Anios_de_contrato'].iloc[edad]
                    # Años desde el el año inicial
                    dif_years = year + 2011 - ini_year
                    # Edad al firmar:
                    sign_age = df_salary_copy[year]['Edad'].iloc[edad] - dif_years
                    # Cambio de dato:
                    df_salary_copy[year].iloc[edad, df_salary_copy[year].columns.get_loc('Edad_al_firmar')] = pd.to_numeric(sign_age)

                # Si la columna de edad no contiene un dato coherente
                else:
                    # Cambio de dato:
                    df_salary_copy[year].iloc[edad, df_salary_copy[year].columns.get_loc('Edad_al_firmar')] = pd.to_numeric(18)

    # Transformemos los datos a enteros
    df_salary_copy[year]['Edad_al_firmar'] = pd.to_numeric(df_salary_copy[year]['Edad_al_firmar'])
    
    # Imputación de las edades negativas: 
    for year in range(0,period):
        for edad in range(0,df_salary_copy[year].shape[0]):
            # Condición para imputar:
            if df_salary_copy[year]['Edad'].iloc[edad] <= 0:
                # Si no se indica si tendrá año de agencia libre:
                if df_salary_copy[year]['Anio_de_agente_libre'].iloc[edad] == 0:
                            ag_year = year + 2011 + 1
                # Si tendrá año de agencia libre
                else:
                    ag_year = df_salary_copy[year]['Anio_de_agente_libre'].iloc[edad]
                # Get first year of contract
                ini_year = ag_year - df_salary_copy[year]['Anios_de_contrato'].iloc[edad]
                # Años desde el el año inicial
                dif_years = year + 2011 - ini_year
                # Edad en la temporada:
                seasson_age = df_salary_copy[year]['Edad_al_firmar'].iloc[edad] + dif_years
                # Asignación
                df_salary_copy[year]['Edad'].iloc[edad] = seasson_age
    
    # Creación de la columna de antiguedad:
    for year in range(0,period):
        df_salary_copy[year]['Antiguedad'] = df_salary_copy[year]['Edad'] - df_salary_copy[year]['Edad_al_firmar']
    
    # Pasar el año de la base de datos como categoría:
    for year in range(0,period):
        df_salary_copy[year]['Anio'] = df_salary_copy[year]['Anio'].map(str)
    
    #
    # Limpieza - Bateadores
    #
    for year in range(0,period):
        # Cambio de nombres
        df_hitting_copy[year] = df_hitting_copy[year].rename(columns = {'Player':'Jugador',
                                'GP':'Juegos', 'GP%':'Porcentaje_juegos',
                                'AB':'Al-bate', 'H':'Bateos', 'GS':'Juegos_iniciados',
                                'GS%':'Porcentaje_juegos_iniciados', 'RBI':'Runs-batted-in',
                                'HR':'Home-runs', 'AVG':'Bateos_promedio',
                                '2B':'Dobles', '3B':'Triples', 'OPS':'Porcentaje_On-base-plus-slugging',
                                'SLG':'Porcentaje_slugging', 'OBP':'Porcentaje_on-base'})
    
    #
    # Limpieza - Fildeadores
    #
    for year in range(0,period):
        # Cambio de nombres
        df_pitching_copy[year] = df_pitching_copy[year].rename(columns = {'Player':'Jugador',
                                 'GP':'Juegos', 'GS':'Juegos_iniciados', 'IP':'Inning_pitched',
                                 'H':'Bateos', 'R':'Carreras', 'ER':'Carreras_ganadas',
                                 'BB':'Walks', 'SO':'Strike-outs', 'W':'Wins', 'L':'Losses',
                                 'SV':'Saves'})
        
        # Introducción de medidas de desempeño derivadas:
        df_pitching_copy[year]['Dominio'] = df_pitching_copy[year]['Strike-outs']/(df_pitching_copy[year]['Inning_pitched'])
        df_pitching_copy[year]['Control'] = df_pitching_copy[year]['Walks']/(df_pitching_copy[year]['Inning_pitched'])
        df_pitching_copy[year]['Comando'] = df_pitching_copy[year]['Strike-outs']/df_pitching_copy[year]['Walks']
    
    #
    # ----------------------------------- Transformación --------------------------------
    #
    
    # Sueldos:
    for year in range(0,period):
        df_salary_copy[year]['ln_Sueldo_base'] = np.log(df_salary_copy[year]['Sueldo_base'])
        df_salary_copy[year]['ln_Sueldo_ajustado'] = np.log(df_salary_copy[year]['Sueldo_ajustado'])
        df_salary_copy[year]['ln_Sueldo_regular'] = np.log(df_salary_copy[year]['Sueldo_regular'])
    
    # Imputación de datos NaN o Null:
    for year in range(0,period):
        # Salarios:
        mean_hgt = df_salary_copy[year].loc[df_salary_copy[year]['Altura'] > 4.9].Altura.mean()
        mean_wgh = df_salary_copy[year].loc[df_salary_copy[year]['Peso'] > 0].Peso.mean()
        df_salary_copy[year]['Altura'].fillna(value = mean_hgt, inplace = True)
        df_salary_copy[year]['Altura'].mask(df_salary_copy[year]['Altura'] <= 4.9, mean_hgt, inplace = True)
        df_salary_copy[year]['Peso'].fillna(value = mean_wgh, inplace = True)
        df_salary_copy[year]['Peso'].mask(df_salary_copy[year]['Peso'] <= 0, mean_wgh, inplace = True)

        # Fildeadores:
        mean_war = df_pitching_copy[year].loc[df_pitching_copy[year]['WAR'] > 0].WAR.mean()
        mean_dom = df_pitching_copy[year].loc[df_pitching_copy[year]['Dominio'] > 0].Dominio.mean()
        mean_con = df_pitching_copy[year].loc[df_pitching_copy[year]['Control'] > 0].Control.mean()
        mean_com = df_pitching_copy[year].loc[df_pitching_copy[year]['Comando'] > 0].Comando.mean()
        df_pitching_copy[year]['WAR'].fillna(value = mean_war, inplace = True)
        df_pitching_copy[year]['WAR'].mask(df_pitching_copy[year]['WAR'] <= 0, mean_war, inplace = True)
        df_pitching_copy[year]['Dominio'].fillna(value = mean_dom, inplace = True)
        df_pitching_copy[year]['Dominio'].mask(df_pitching_copy[year]['Dominio'] <= 0, mean_dom, inplace = True)
        df_pitching_copy[year]['Control'].fillna(value = mean_con, inplace = True)
        df_pitching_copy[year]['Control'].mask(df_pitching_copy[year]['Control'] <= 0, mean_con, inplace = True)
        df_pitching_copy[year]['Comando'].fillna(value = mean_com, inplace = True)
        df_pitching_copy[year]['Comando'].mask(df_pitching_copy[year]['Comando'] <= 0, mean_com, inplace = True)

        # Bateadores:
        mean_war = df_hitting_copy[year].loc[df_hitting_copy[year]['WAR'] > 0].WAR.mean()
        df_hitting_copy[year]['WAR'].fillna(value = mean_war, inplace = True)
        df_hitting_copy[year]['WAR'].mask(df_hitting_copy[year]['WAR'] <= 0, mean_war, inplace = True)
    
    # Condiciones de imputación:
    for year in range(0,period):   
        # Condiciones
        con_dom_1 = df_pitching_copy[year]['Strike-outs'] == 0
        con_con_1 = df_pitching_copy[year]['Walks'] == 0
        con_com_1 = df_pitching_copy[year]['Strike-outs'] == 0

        # Imputación caso 0/0
        df_pitching_copy[year].loc[con_dom_1, "Dominio"] = 0
        df_pitching_copy[year].loc[con_con_1, "Control"] = 0
        df_pitching_copy[year].loc[con_com_1, "Comando"] = 0
    
    # Imputación de acuerdo a las condiciones:
    for year in range(0,period):   
        # Máximos
        max_dom = df_pitching_copy[year]['Strike-outs'].max()/9
        max_con = df_pitching_copy[year]['Walks'].max()/9
        max_com = df_pitching_copy[year]['Strike-outs'].max()

        # Cambianfdo infinitos a NaNs
        df_pitching_copy[year]["Dominio"].replace([np.inf, -np.inf], np.nan, inplace = True)
        df_pitching_copy[year]["Control"].replace([np.inf, -np.inf], np.nan, inplace = True)
        df_pitching_copy[year]["Comando"].replace([np.inf, -np.inf], np.nan, inplace = True)

        # Imputación
        df_pitching_copy[year]['Dominio'].fillna(value = max_dom, inplace = True)
        df_pitching_copy[year]['Control'].fillna(value = max_con, inplace = True)
        df_pitching_copy[year]['Comando'].fillna(value = max_com, inplace = True)
    
    # Imputación para los salarios negativos
    for year in range(0,period):
        df_salary_copy[year]['ln_Sueldo_ajustado'].mask(df_salary_copy[year]['ln_Sueldo_ajustado'] < 0,
                                                        df_salary_copy[year]['ln_Sueldo_regular'],
                                                        inplace = True)
    
    #
    # Medidas de desempeño cuadráticas
    #
    hitting_indexes = list(get_col_indices(df_hitting_copy[0], hitting_names))
    pitching_indexes = list(get_col_indices(df_pitching_copy[0], pitching_names))
    
    # Creación:
    for year in range(0,period):
        # Hitters:
        for hitter_name in hitting_indexes:
            df_hitting_copy[year][df_hitting_copy[year].columns[hitter_name] + '_2'] = np.power(df_hitting_copy[year][df_hitting_copy[year].columns[hitter_name]], 2)
        # Pitchers:
        for pitcher_name in pitching_indexes:
            df_pitching_copy[year][df_pitching_copy[year].columns[pitcher_name] + '_2'] = np.power(df_pitching_copy[year][df_pitching_copy[year].columns[pitcher_name]], 2)
    
    #
    # ------------------------------------ Unión de df ----------------------------------
    #
    # Equipos
    #
    for year in range(0,period):
        df_teams_copy[year] = pd.merge(df_teams_copy[year],
                                       acronym_state,
                                       on = ['Equipo','Acronimo'])
    # Salarios
    #
    for year in range(0,period):
        df_salary_copy[year] = pd.merge(df_teams_copy[year],
                                        df_salary_copy[year],
                                        on = 'Acronimo')
    
    # Medidas de desempeño
    #
    for year in range(0,period):
        df_hitting_copy[year] = pd.merge(df_hitting_copy[year],
                                         df_salary_copy[year],
                                         on = 'Jugador')
        df_pitching_copy[year] = pd.merge(df_pitching_copy[year],
                                          df_salary_copy[year],
                                          on = 'Jugador')
    
    # Creación de la columna de porcentaje de juegos
    for year in range(0,period):
        df_pitching_copy[year]['Porcentaje_juegos'] = df_pitching_copy[year]['Juegos']/df_pitching_copy[year]['Juegos totales']
        
    # Limpieza de índices de columna
    for year in range(0,period):
        # Ordenando alfabéticamente
        df_salary_copy[year].sort_index(axis = 1,
                                        inplace = True)
        df_hitting_copy[year].sort_index(axis = 1,
                                         inplace = True)
        df_pitching_copy[year].sort_index(axis = 1,
                                          inplace = True)
        df_free_agents_copy[year].sort_index(axis = 1,
                                             inplace = True)

        # Reiniciando los índices
        df_salary_copy[year].reset_index(drop = True,
                                         inplace = True)
        df_hitting_copy[year].reset_index(drop = True,
                                          inplace = True)
        df_pitching_copy[year].reset_index(drop = True,
                                           inplace = True)
        df_free_agents_copy[year].reset_index(drop = True,
                                              inplace = True)
        
    #
    # --------------------------------- Variables rezagadas -----------------------------
    #
    # Auxiliares
    #
    df_hitters_copy = [None]*period
    df_pitchers_copy = [None]*period
    
    for year in range(0,period):
        df_hitters_copy[year] = df_hitting_copy[year].copy()
        df_pitchers_copy[year] = df_pitching_copy[year].copy()
        
    for year in range(1,period):    
        df_hitting_copy[year] = pd.merge(df_hitters_copy[year],
                                         df_hitters_copy[year-1],
                                         on = 'Jugador')
        df_pitching_copy[year] = pd.merge(df_pitchers_copy[year],
                                          df_pitchers_copy[year-1],
                                          on = 'Jugador')
    
    # Corrección de nombres
    #
    for year in range(1,period):       
        df_pitching_copy[year].columns = df_pitching_copy[year].columns.str.replace('_x', '_t')
        df_pitching_copy[year].columns = df_pitching_copy[year].columns.str.replace('_y', '_t_1')
        df_pitching_copy[year].columns = df_pitching_copy[year].columns.str.replace('-', '_')
        df_pitching_copy[year].columns = df_pitching_copy[year].columns.str.replace(' ', '_')
        df_pitching_copy[year].drop(['ln_Sueldo_base_t_1',
                                     'ln_Sueldo_ajustado_t_1',
                                     'ln_Sueldo_regular_t_1'],
                               axis = 1, inplace = True)
        df_pitching_copy[year] = df_pitching_copy[year].sort_values(by = 'Jugador',
                                                                    ascending = True)
        df_pitching_copy[year].reset_index(drop = True,
                                           inplace = True)

        df_hitting_copy[year].columns = df_hitting_copy[year].columns.str.replace('_x', '_t')
        df_hitting_copy[year].columns = df_hitting_copy[year].columns.str.replace('_y', '_t_1')
        df_hitting_copy[year].columns = df_hitting_copy[year].columns.str.replace('-', '_')
        df_hitting_copy[year].columns = df_hitting_copy[year].columns.str.replace(' ', '_')
        df_hitting_copy[year].drop(['ln_Sueldo_base_t_1',
                                    'ln_Sueldo_ajustado_t_1',
                                    'ln_Sueldo_regular_t_1'],
                              axis = 1, inplace = True)
        df_hitting_copy[year] = df_hitting_copy[year].sort_values(by = 'Jugador',
                                                                  ascending = True)
        df_hitting_copy[year].reset_index(drop = True,
                                          inplace = True)

        # Reordenando las columnas
        df_hitting_copy[year].sort_index(axis = 1,
                                         inplace = True)
        df_pitching_copy[year].sort_index(axis = 1,
                                          inplace = True)
        
    # Quitando columnas innecesarias
    #
    for year in range(1,period):
        df_pitching_copy[year].drop(['Anio_t_1', 'Estado_t_1', 'Edad_t_1'],
                               axis = 1, inplace = True)

        df_hitting_copy[year].drop(['Anio_t_1', 'Estado_t_1', 'Edad_t_1'],
                               axis = 1, inplace = True)

        # Reordenando las columnas
        df_hitting_copy[year].sort_index(axis = 1, inplace = True)
        df_pitching_copy[year].sort_index(axis = 1, inplace = True)

        # Reiniciando índice
        df_hitting_copy[year].reset_index(drop = True, inplace = True)
        df_pitching_copy[year].reset_index(drop = True, inplace = True)
    
    # Cambiando sufijo de bases del 2011
    year = 0
    # Reiniciando los índices
    df_hitting_copy[year] = df_hitting_copy[year].add_suffix('_t')
    df_pitching_copy[year] = df_pitching_copy[year].add_suffix('_t')
    # Corrección de columna del jugador
    df_hitting_copy[year].columns = df_hitting_copy[year].columns.str.replace('Jugador_t', 'Jugador')
    df_pitching_copy[year].columns = df_pitching_copy[year].columns.str.replace('Jugador_t', 'Jugador')
    
    #
    # ------------------------------------ Segmentación --------------------------------
    #
    for year in range(0,period):
        # Filtrando los agentes libres
        df_hitters_free_agents[year] = pd.merge(df_free_agents_copy[year],
                                                df_hitting_copy[year], on = 'Jugador')
        df_pitchers_free_agents[year] = pd.merge(df_free_agents_copy[year],
                                                 df_pitching_copy[year], on = 'Jugador')
        # FIltrando los que no son agentes libres
        df_hitters_no_free_agents[year] = df_hitting_copy[year][~df_hitting_copy[year].Jugador.isin(df_hitters_free_agents[year].Jugador)]
        df_pitchers_no_free_agents[year] = df_pitching_copy[year][~df_pitching_copy[year].Jugador.isin(df_pitchers_free_agents[year].Jugador)]

        # Reiniciando el índice
        df_hitters_free_agents[year] = df_hitters_free_agents[year].reindex(sorted(df_hitters_free_agents[year].columns), axis = 1)
        df_pitchers_free_agents[year] = df_pitchers_free_agents[year].reindex(sorted(df_pitchers_free_agents[year].columns), axis = 1)
        df_hitters_no_free_agents[year] = df_hitters_no_free_agents[year].reindex(sorted(df_hitters_no_free_agents[year].columns), axis = 1)
        df_pitchers_no_free_agents[year] = df_pitchers_no_free_agents[year].reindex(sorted(df_pitchers_no_free_agents[year].columns), axis = 1)
    
    # Cambiar "case" de las columnas
    #
    for year in range(0,period):
        df_hitters_free_agents[year].rename(columns = str.lower)
        df_pitchers_free_agents[year].rename(columns = str.lower)
        df_hitters_no_free_agents[year].rename(columns = str.lower)
        df_pitchers_no_free_agents[year].rename(columns = str.lower)
    
    # Exportar 
    #
    for year in range(0,period):    
        # Exportemos los dataframes por separado
        df_hitters_free_agents[year].to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/Free_Agent/Hitters/free_agents_batters_' + str(2011 + year) + '.csv',
                                            index = False)
        df_pitchers_free_agents[year].to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/Free_Agent/Pitchers/free_agents_pitchers_' + str(2011 + year) + '.csv',
                                             index = False)
        df_hitters_no_free_agents[year].to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/No_Free_Agent/Hitters/no_free_agents_batters_' + str(2011 + year) + '.csv',
                                               index = False)
        df_pitchers_no_free_agents[year].to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/No_Free_Agent/Pitchers/no_free_agents_pitchers_' + str(2011 + year) + '.csv',
                                                index = False)
        
    #
    # --------------------------------------- Panel -----------------------------------
    #
    # Inicialización del panel
    #
    df_panel_all_hitter = df_hitting_copy[0]
    df_panel_all_pitcher = df_pitching_copy[0]

    for year in range(1,period):
        # Bateador
        df_panel_all_hitter = pd.concat([df_panel_all_hitter,
                                         df_hitting_copy[year]])

        # Fildeadores
        df_panel_all_pitcher = pd.concat([df_panel_all_pitcher,
                                          df_pitching_copy[year]])
    
    # Ordenando alfabéticamente las columnas
    #
    df_panel_all_hitter.sort_index(axis = 1,
                                   inplace = True)
    df_panel_all_pitcher.sort_index(axis = 1,
                                    inplace = True)
    
    # Exportando panel
    df_panel_all_hitter.to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/Hitters/All_Hitters/panel_hitters' + '.csv',
                               index = False)
    df_panel_all_pitcher.to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/Pitchers/All_Pitchers/panel_pitchers' + '.csv',
                                index = False)
    
    #
    # ---------------------------------------- FA -------------------------------------
    #
    # Inicialización del panel
    df_panel_fa_hitter = df_hitters_free_agents[0]
    df_panel_fa_pitcher = df_pitchers_free_agents[0]

    for year in range(1,period):
        # Bateadores:
        df_panel_fa_hitter = pd.concat([df_panel_fa_hitter,
                                        df_hitters_free_agents[year]])

        # Fildeadores:
        df_panel_fa_pitcher = pd.concat([df_panel_fa_pitcher,
                                         df_pitchers_free_agents[year]])
    
    df_panel_fa_hitter.drop('Anios_de_contrato',
                            axis = 1,
                            inplace = True)
    df_panel_fa_pitcher.drop('Anios_de_contrato',
                             axis = 1,
                             inplace = True)
    
    # Ordenando columnas
    #
    df_panel_fa_hitter.sort_index(axis = 1,
                                  inplace = True)
    df_panel_fa_pitcher.sort_index(axis = 1,
                                   inplace = True)
    
    # Exportando el panel
    #
    df_panel_fa_hitter.to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/Free_Agent/Hitters/panel_hitters' + '.csv',
                              index = False)
    df_panel_fa_pitcher.to_csv('ETL_Data/' + df_types[0] + '/Agent/1_Years_Contract/Period_t_1/Free_Agent/Pitchers/panel_pitchers' + '.csv',
                               index = False)
    
    # -----------------------------------------------------------------------------------
    # --------------------------------------- Modelo ------------------------------------
    # -----------------------------------------------------------------------------------
    # Copias del panel
    #
    empiric_panel_hitter = df_panel_fa_hitter.copy()
    empiric_panel_pitcher = df_panel_fa_pitcher.copy()
    
    # Bateador:
    empiric_panel_hitter = empiric_panel_hitter.sort_values(by = ['Jugador','Anio_t'],
                                                            ascending=True)
    empiric_panel_hitter.reset_index(drop = True,
                                     inplace = True)

    # Fildero:
    empiric_panel_pitcher = empiric_panel_pitcher.sort_values(by = ['Jugador','Anio_t'],
                                                              ascending=True)
    empiric_panel_pitcher.reset_index(drop = True,
                                      inplace = True)
    
    # Bateador:
    for stat in range(0,len(hitter_regular_stats)):
        # Variables auxiliares
        stat_name = hitter_regular_stats[stat]
        max_stat_name = stat_name + '_H'
        min_stat_name = stat_name + '_L'

        # Máximos por equipo
        max_stat = pd.DataFrame({"Acronimo_t":empiric_panel_hitter.groupby(by = "Acronimo_t")[stat_name].max().index,
                                 max_stat_name: empiric_panel_hitter.groupby(by = "Acronimo_t")[stat_name].max().values})
        # Mínimos por equipo
        min_stat = pd.DataFrame({"Acronimo_t":empiric_panel_hitter.groupby(by = "Acronimo_t")[stat_name].min().index,
                                 min_stat_name: empiric_panel_hitter.groupby(by = "Acronimo_t")[stat_name].min().values})
        empiric_panel_hitter = empiric_panel_hitter.merge(max_stat, on = "Acronimo_t",
                                                          how = "left")
        empiric_panel_hitter = empiric_panel_hitter.merge(min_stat, on = "Acronimo_t",
                                                          how = "left")
        
    

In [15]:
for name in df_panel_all_hitter.columns:
    print(name)
    if type(name) != str:
        for element in range(0,len(df_panel_all_hitter[name])):
            if math.isinf(df_panel_all_hitter[name].iloc[element]) == True:
                print(str(element) +  '  ' + str(df_panel_all_hitter[name].iloc[element]))

Acronimo_t
Al-bate_t
Al-bate_2_t
Altura_t
Anio_t
Anio_de_agente_libre_t
Anios_de_contrato_t
Antiguedad_t
Bateos_t
Bateos_2_t
Bateos_promedio_t
Bateos_promedio_2_t
Bono_por_firma_t
Cantidad de equipos_t
Cantidad_agentes_libres_t
Dobles_t
Dobles_2_t
Edad_t
Edad_al_firmar_t
Equipo_t
Estado_t
Ganancias_t
Home-runs_t
Home-runs_2_t
Juegos_t
Juegos totales_t
Juegos_iniciados_t
Juegos_iniciados_2_t
Jugador
Pago_efectivo_t
Pennants won_t
Peso_t
Playoffs_t
Porcentaje_On-base-plus-slugging_t
Porcentaje_On-base-plus-slugging_2_t
Porcentaje_juegos_t
Porcentaje_juegos_iniciados_t
Porcentaje_juegos_iniciados_2_t
Porcentaje_on-base_t
Porcentaje_on-base_2_t
Porcentaje_slugging_t
Porcentaje_slugging_2_t
Posicion_t
Promedio_victorias_t
Runs-batted-in_t
Runs-batted-in_2_t
Sueldo_ajustado_t
Sueldo_base_t
Sueldo_porcentual_t
Sueldo_regular_t
TVS_t
TVS_2_t
Triples_t
Triples_2_t
Valor_contrato_promedio_t
Valor_contrato_total_t
Valor_del_contrato_t
Victorias_t
WAR_t
WAR_2_t
WS ganadas_t
ln_Sueldo_ajustado_t
ln

In [16]:
for name in df_panel_all_pitcher.columns:
    print(name)
    if type(name) != str:
        for element in range(0,len(df_panel_all_pitcher[name])):
            if math.isinf(df_panel_all_pitcher[name].iloc[element]) == True:
                print(str(element) +  '  ' + str(df_panel_all_pitcher[name].iloc[element]))

Acronimo_t
Altura_t
Anio_t
Anio_de_agente_libre_t
Anios_de_contrato_t
Antiguedad_t
Bateos_t
Bateos_2_t
Bono_por_firma_t
Cantidad de equipos_t
Cantidad_agentes_libres_t
Carreras_t
Carreras_2_t
Carreras_ganadas_t
Carreras_ganadas_2_t
Comando_t
Comando_2_t
Control_t
Control_2_t
Dominio_t
Dominio_2_t
ERA_t
ERA_2_t
Edad_t
Edad_al_firmar_t
Equipo_t
Estado_t
Ganancias_t
Inning_pitched_t
Inning_pitched_2_t
Juegos_t
Juegos totales_t
Juegos_iniciados_t
Jugador
Losses_t
Losses_2_t
Pago_efectivo_t
Pennants won_t
Peso_t
Playoffs_t
Porcentaje_juegos_t
Posicion_t
Promedio_victorias_t
Saves_t
Saves_2_t
Strike-outs_t
Strike-outs_2_t
Sueldo_ajustado_t
Sueldo_base_t
Sueldo_porcentual_t
Sueldo_regular_t
TVS_t
TVS_2_t
Valor_contrato_promedio_t
Valor_contrato_total_t
Valor_del_contrato_t
Victorias_t
WAR_t
WAR_2_t
WHIP_t
WHIP_2_t
WS ganadas_t
Walks_t
Walks_2_t
Wins_t
Wins_2_t
ln_Sueldo_ajustado_t
ln_Sueldo_base_t
ln_Sueldo_regular_t
Acronimo_t_1
Altura_t_1
Anio_de_agente_libre_t_1
Anios_de_contrato_t_1
Antig