# Tabla players (tabla de hechos)

### Se importan las librerias a usar 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Carga del dataframe de players

In [2]:
players= pd.read_csv('player.csv')

In [3]:
np.shape(players)

(4831, 5)

### Visualizacion preeliminar del dataframe

In [4]:
players.head()

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,0
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,0
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,0
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,0
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,0


### Ver el tipo de datos del dataframe

In [5]:
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4831 entries, 0 to 4830
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          4831 non-null   int64 
 1   full_name   4831 non-null   object
 2   first_name  4825 non-null   object
 3   last_name   4831 non-null   object
 4   is_active   4831 non-null   int64 
dtypes: int64(2), object(3)
memory usage: 188.8+ KB


### Filtro por jugadores activos

In [6]:
players_active=players[players['is_active'] >= 1]

In [7]:
players_active

Unnamed: 0,id,full_name,first_name,last_name,is_active
10,1630173,Precious Achiuwa,Precious,Achiuwa,1
22,203500,Steven Adams,Steven,Adams,1
24,1628389,Bam Adebayo,Bam,Adebayo,1
29,1630534,Ochai Agbaji,Ochai,Agbaji,1
41,1630583,Santi Aldama,Santi,Aldama,1
...,...,...,...,...,...
4806,201152,Thaddeus Young,Thaddeus,Young,1
4808,1629027,Trae Young,Trae,Young,1
4809,1630209,Omer Yurtseven,Omer,Yurtseven,1
4812,203469,Cody Zeller,Cody,Zeller,1


### Revisamos los valores duplicados y nulos

In [8]:
# PORCENTAJE DE VALORES NULOS
nulos_total = players_active.isnull().sum()
porcentaje_nulos = (nulos_total / len(players_active)) * 100

resumen_nulos = pd.DataFrame({
    'Valores Nulos': nulos_total,
    'Porcentaje Nulos': porcentaje_nulos
})

print(resumen_nulos)

            Valores Nulos  Porcentaje Nulos
id                      0               0.0
full_name               0               0.0
first_name              0               0.0
last_name               0               0.0
is_active               0               0.0


In [9]:
duplicados = players_active.duplicated(keep=False)

# Filtra las filas duplicadas
datos_duplicados = players_active[duplicados]
print(datos_duplicados)

Empty DataFrame
Columns: [id, full_name, first_name, last_name, is_active]
Index: []


### Cambio de nombre de columna 

In [10]:
players_active = players_active.rename(columns={'id': 'player_id'})

In [11]:
players_active

Unnamed: 0,player_id,full_name,first_name,last_name,is_active
10,1630173,Precious Achiuwa,Precious,Achiuwa,1
22,203500,Steven Adams,Steven,Adams,1
24,1628389,Bam Adebayo,Bam,Adebayo,1
29,1630534,Ochai Agbaji,Ochai,Agbaji,1
41,1630583,Santi Aldama,Santi,Aldama,1
...,...,...,...,...,...
4806,201152,Thaddeus Young,Thaddeus,Young,1
4808,1629027,Trae Young,Trae,Young,1
4809,1630209,Omer Yurtseven,Omer,Yurtseven,1
4812,203469,Cody Zeller,Cody,Zeller,1


In [12]:
salario = pd.read_csv('salary_final.csv')

### Se elimina columnas que no se van a usar 

In [13]:
salario = salario.drop('Unnamed: 0', axis=1)
salario = salario.drop('Temp', axis=1)

In [14]:
salario.columns

Index(['player_id', 'Player', 'Salary'], dtype='object')

In [15]:
np.shape(salario)

(306, 3)

In [16]:
players_active.columns

Index(['player_id', 'full_name', 'first_name', 'last_name', 'is_active'], dtype='object')

In [17]:
np.shape(players_active)

(582, 5)

### Se realiza un merge entre jugadores y salarios 

In [18]:
df_merged = pd.merge(players_active, salario[['player_id', 'Salary']], on='player_id', how='left')

In [19]:
np.shape(df_merged)

(582, 6)

In [20]:
# PORCENTAJE DE VALORES NULOS
nulos_total = df_merged.isnull().sum()
porcentaje_nulos = (nulos_total / len(df_merged)) * 100

resumen_nulos = pd.DataFrame({
    'Valores Nulos': nulos_total,
    'Porcentaje Nulos': porcentaje_nulos
})

print(resumen_nulos)

            Valores Nulos  Porcentaje Nulos
player_id               0          0.000000
full_name               0          0.000000
first_name              0          0.000000
last_name               0          0.000000
is_active               0          0.000000
Salary                292         50.171821


In [21]:
df_merged = df_merged.fillna(0)

In [22]:
# PORCENTAJE DE VALORES NULOS
nulos_total = df_merged.isnull().sum()
porcentaje_nulos = (nulos_total / len(df_merged)) * 100

resumen_nulos = pd.DataFrame({
    'Valores Nulos': nulos_total,
    'Porcentaje Nulos': porcentaje_nulos
})

print(resumen_nulos)

            Valores Nulos  Porcentaje Nulos
player_id               0               0.0
full_name               0               0.0
first_name              0               0.0
last_name               0               0.0
is_active               0               0.0
Salary                  0               0.0


In [23]:
df_merged['Salary'] = df_merged['Salary'].astype('int')

### Visualizacion final 

In [24]:
df_merged

Unnamed: 0,player_id,full_name,first_name,last_name,is_active,Salary
0,1630173,Precious Achiuwa,Precious,Achiuwa,1,0
1,203500,Steven Adams,Steven,Adams,1,0
2,1628389,Bam Adebayo,Bam,Adebayo,1,30351780
3,1630534,Ochai Agbaji,Ochai,Agbaji,1,3918360
4,1630583,Santi Aldama,Santi,Aldama,1,0
...,...,...,...,...,...,...
577,201152,Thaddeus Young,Thaddeus,Young,1,8000000
578,1629027,Trae Young,Trae,Young,1,37096500
579,1630209,Omer Yurtseven,Omer,Yurtseven,1,1752638
580,203469,Cody Zeller,Cody,Zeller,1,0


### Exportamos los datos.

In [25]:
df_merged.to_csv('player_filtrado.csv', index=False)