# Pandas pada Psicóloogos
![](https://numfocus.org/wp-content/uploads/2016/07/pandas-logo-300.png)

[pandas - Python Data Analysis Library](https://pandas.pydata.org/docs/)

# Numpy para Psicólogos
![](https://user-images.githubusercontent.com/50221806/85190325-2b3f4400-b26c-11ea-93f3-81a101614a88.png)

[Numerical Python](https://numpy.org/)

## Introducción a los DataSets

In [29]:
import pandas as pd
import numpy as np
# librería "glob" — Busca nombres de archivos que coincidan con un patrón.
from glob import glob as gg
# Libería "tqdm" para ver el progreso del o de los procesos
from tqdm import tqdm
# Para que "tqdm" funcione
import time

In [2]:
# Búsqueda de las Bases
files_xlsx = gg('*.xlsx')
files_xlsx

['Ejemplo1.xlsx', 'Ejemplo2.xlsx', 'Ejemplo3.xlsx']

In [3]:
xlsx1 = pd.read_excel(files_xlsx[0])
xlsx1.head()
# ¿El ID lo queremos como índice?

Unnamed: 0,ID,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10
0,1,-313,-850.0,590.0,-171,-839.0,-277,770,-790,0.959602,699
1,2,473,473.0,-146.0,-292,-652.0,-774,-569,330,0.354079,-263
2,3,84,,207.0,-56,-67.0,Falsedades,597,-752,0.820334,420
3,4,Nada,,47.0,3,-422.0,0,-820,-699,0.428466,1983
4,5,322,859.0,,-346,,85,-627,835,0.467556,-273


In [4]:
xlsx1 = pd.read_excel(files_xlsx[0], index_col='ID')
xlsx1.head()

Unnamed: 0_level_0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,-313,-850.0,590.0,-171,-839.0,-277,770,-790,0.959602,699
2,473,473.0,-146.0,-292,-652.0,-774,-569,330,0.354079,-263
3,84,,207.0,-56,-67.0,Falsedades,597,-752,0.820334,420
4,Nada,,47.0,3,-422.0,0,-820,-699,0.428466,1983
5,322,859.0,,-346,,85,-627,835,0.467556,-273


In [5]:
# Número de observaciones que tiene el dataset
xlsx1.shape #(filas, columnas)

(20, 10)

In [6]:
xlsx1.shape[0] #quiero solo las filas

20

In [7]:
xlsx1.shape[1] #quiero solo las columnas

10

In [8]:
# Pero deseo toda la información
xlsx1.info() #¿Cómo interpretar esta información?

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20 entries, 1 to 20
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Var1    17 non-null     object 
 1   Var2    18 non-null     object 
 2   Var3    18 non-null     object 
 3   Var4    20 non-null     object 
 4   Var5    19 non-null     object 
 5   Var6    20 non-null     object 
 6   Var7    20 non-null     object 
 7   Var8    14 non-null     object 
 8   Var9    20 non-null     float64
 9   Var10   20 non-null     int64  
dtypes: float64(1), int64(1), object(8)
memory usage: 1.7+ KB


In [9]:
# Quiero saber solo las filas
xlsx1.index

Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20],
           dtype='int64', name='ID')

In [10]:
# Quiero solo saber los nombres de las columnas
xlsx1.columns

Index(['Var1', 'Var2', 'Var3', 'Var4', 'Var5', 'Var6', 'Var7', 'Var8', 'Var9',
       'Var10'],
      dtype='object')

In [11]:
print(xlsx1.Var1.unique())
print(' ')
print(list(xlsx1.Var1.unique()))

[-313 473 84 'Nada' 322 -860 700 247 -512 -386 610 -237 207 nan -671 977
 -865 -545]
 
[-313, 473, 84, 'Nada', 322, -860, 700, 247, -512, -386, 610, -237, 207, nan, -671, 977, -865, -545]


In [12]:
# Quiero saber los valores que convierte la columna Var1 en objeto
objetos = [tipo for tipo in xlsx1.Var1.unique() if type(tipo) == str]
objetos

['Nada']

In [13]:
objetos = []
for tipo in xlsx1.Var1.unique():
    if type(tipo) == str:
        objetos.append(tipo)
objetos

['Nada']

In [14]:
objetos = [tipo for tipo in xlsx1.Var5.unique() if type(tipo) == str]
objetos

['errores', 'Falsedades']

In [15]:
objetos = []
for tipo in xlsx1.Var5.unique():
    if type(tipo) == str:
        objetos.append(tipo)
objetos

['errores', 'Falsedades']

In [16]:
# Hacerlo por cada uno toma mucho recurso de tiempo ¿Cómo mejoramos esto?
objetos = []
for columna in xlsx1.columns:
    print('En la columna {}'.format(columna))
    for tipo in xlsx1[columna].unique():
        if type(tipo) == str:
            print(tipo)
            print(' ')
            objetos.append(tipo)
            
objetos

En la columna Var1
Nada
 
En la columna Var2
Nada
 
En la columna Var3
Mentiras
 
En la columna Var4
Falsedades
 
En la columna Var5
errores
 
Falsedades
 
En la columna Var6
Falsedades
 
errores
 
En la columna Var7
Mentiras
 
En la columna Var8
Falsedades
 
En la columna Var9
En la columna Var10


['Nada',
 'Nada',
 'Mentiras',
 'Falsedades',
 'errores',
 'Falsedades',
 'Falsedades',
 'errores',
 'Mentiras',
 'Falsedades']

In [17]:
objetos = set(objetos)
objetos

{'Falsedades', 'Mentiras', 'Nada', 'errores'}

In [18]:
objetos = list(set(objetos))
objetos

['Falsedades', 'Mentiras', 'errores', 'Nada']

In [19]:
lista1 = objetos
lista2 = np.empty(len(lista1))
print(lista2)

[ 1.49166815e-154 -1.49457443e-154  6.93633997e-310  3.47497632e-309]


In [20]:
lista2[:] = np.nan
print(lista2)

[nan nan nan nan]


In [21]:
objetos3 = zip(lista1, lista2)
objetos4 = dict(objetos3)
objetos4

{'Falsedades': nan, 'Mentiras': nan, 'errores': nan, 'Nada': nan}

In [22]:
# Como debe de verse
objetos = []
for columna in xlsx1.columns:
    for tipo in xlsx1[columna].unique():
        if type(tipo) == str:
            objetos.append(tipo)
            
lista1 = list(set(objetos))
lista2 = np.empty(len(lista1))
lista2[:] = np.nan
objetos = dict(zip(lista1, lista2))
objetos

{'Falsedades': nan, 'Mentiras': nan, 'errores': nan, 'Nada': nan}

In [23]:
xlsx1

Unnamed: 0_level_0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,-313,-850,590,-171,-839,-277,770,-790,0.959602,699
2,473,473,-146,-292,-652,-774,-569,330,0.354079,-263
3,84,,207,-56,-67,Falsedades,597,-752,0.820334,420
4,Nada,,47,3,-422,0,-820,-699,0.428466,1983
5,322,859,,-346,,85,-627,835,0.467556,-273
6,-860,727,816,504,-830,-868,-756,Falsedades,0.724022,750
7,700,-507,-544,-789,-864,129,418,716,0.508727,477
8,247,-677,841,-458,errores,-814,742,-620,0.110713,-280
9,-512,573,344,-262,-595,-67,304,298,0.572773,-651
10,-386,507,876,Falsedades,38,-267,-8,712,0.422481,2020


In [24]:
xlsx1 = xlsx1.replace(objetos)
xlsx1

Unnamed: 0_level_0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,-313.0,-850.0,590.0,-171.0,-839.0,-277.0,770.0,-790.0,0.959602,699
2,473.0,473.0,-146.0,-292.0,-652.0,-774.0,-569.0,330.0,0.354079,-263
3,84.0,,207.0,-56.0,-67.0,,597.0,-752.0,0.820334,420
4,,,47.0,3.0,-422.0,0.0,-820.0,-699.0,0.428466,1983
5,322.0,859.0,,-346.0,,85.0,-627.0,835.0,0.467556,-273
6,-860.0,727.0,816.0,504.0,-830.0,-868.0,-756.0,,0.724022,750
7,700.0,-507.0,-544.0,-789.0,-864.0,129.0,418.0,716.0,0.508727,477
8,247.0,-677.0,841.0,-458.0,,-814.0,742.0,-620.0,0.110713,-280
9,-512.0,573.0,344.0,-262.0,-595.0,-67.0,304.0,298.0,0.572773,-651
10,-386.0,507.0,876.0,,38.0,-267.0,-8.0,712.0,0.422481,2020


## ¿Se acuerdan de las demás bases?

In [25]:
print(files_xlsx)

['Ejemplo1.xlsx', 'Ejemplo2.xlsx', 'Ejemplo3.xlsx']


### ¿Qué función aplicamos y cómo lo aplicamos para que haga todo de una buena vez?
#### ¿Se acuerdan del "Ciclo FOR"?

In [37]:
print(files_xlsx[0])
print(pd.read_excel(files_xlsx[0], index_col='ID').head())
print(' ')
print(files_xlsx[1])
print(pd.read_excel(files_xlsx[1], index_col='ID').head())
print(' ')
print(files_xlsx[2])
print(pd.read_excel(files_xlsx[2], index_col='ID').head())

Ejemplo1.xlsx
    Var1  Var2  Var3  Var4  Var5        Var6  Var7  Var8      Var9  Var10
ID                                                                       
1   -313  -850   590  -171  -839        -277   770  -790  0.959602    699
2    473   473  -146  -292  -652        -774  -569   330  0.354079   -263
3     84   NaN   207   -56   -67  Falsedades   597  -752  0.820334    420
4   Nada   NaN    47     3  -422           0  -820  -699  0.428466   1983
5    322   859   NaN  -346   NaN          85  -627   835  0.467556   -273
 
Ejemplo2.xlsx
       Var21     Var22     Var23     Var24     Var25     Var26      Var27  \
ID                                                                          
1   8.226895  2.938617  8.440861  2.633110  8.948942  5.390803  10.484627   
2   7.758571  2.675094  7.454070  2.176030  8.239736  5.073068   9.785837   
3   7.238483  2.648811  7.091948  2.015311  7.673448  4.830851   9.722650   
4   6.685766  1.681903  6.849771  1.677720  7.180970  4.156083   9.

In [41]:
for base in files_xlsx:
    print(base)

Ejemplo1.xlsx
Ejemplo2.xlsx
Ejemplo3.xlsx


In [38]:
for mi_base in tqdm(files_xlsx):
    print('Se arreglará el archivo {} como usted lo pidió'.format(mi_base))
    xlsx = pd.read_excel(mi_base, index_col='ID')
    objetos = []
    for columna in xlsx.columns:
        for tipo in xlsx[columna].unique():
            if type(tipo) == str:
                objetos.append(tipo)

    lista1 = list(set(objetos))
    lista2 = np.empty(len(lista1))
    lista2[:] = np.nan
    objetos = dict(zip(lista1, lista2))
    time.sleep(0.01)

100%|██████████| 3/3 [00:00<00:00, 42.58it/s]

Se arreglará el archivo Ejemplo1.xlsx como usted lo pidió
Se arreglará el archivo Ejemplo2.xlsx como usted lo pidió
Se arreglará el archivo Ejemplo3.xlsx como usted lo pidió





In [34]:
# ¿Son todos los objetos de todas las bases?
objetos

{'Desvalorización': nan,
 'La': nan,
 'Maldita': nan,
 'Te ': nan,
 'Hora': nan,
 'Abrazos': nan,
 'Burlas': nan,
 'Conocí': nan,
 'No': nan,
 'Soberbia': nan,
 'Balazos': nan,
 'Que': nan,
 'Guerras': nan}

In [39]:
# ¿Porqué está dando solo el archivo "Ejemplo3.xlsx?"
# ¿Y los demás?
xlsx

Unnamed: 0_level_0,Var11,Var12,Var13,Var14,Var15,Var16,Var17,Var18,Var19,Var20
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1961,1925,1982,1939,1986,1920,1950,1935,1929,1935
2,1966,2000,1930,Abrazos,No,Balazos,1963,1982,1964,1894
3,1903,1936,1899,1921,1965,1948,2012,1996,1893,1906
4,1929,2011,1951,2004,1954,1949,1919,1986,1913,1915
5,1925,2000,1924,1997,1904,1901,1998,1936,1961,1948
6,1930,1908,2014,1961,1924,1957,1897,1967,1945,1919
7,1971,2000,1984,1926,1963,1896,2011,1985,1943,1904
8,1937,1907,1911,Maldita,La,Hora,Que,Te,Conocí,1993
9,1980,1912,1917,2009,2006,2007,1920,1939,1941,1940
10,1949,1946,2018,2011,1893,2010,1944,1905,1930,2009


In [40]:
xlsx.replace(objetos)

Unnamed: 0_level_0,Var11,Var12,Var13,Var14,Var15,Var16,Var17,Var18,Var19,Var20
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1961.0,1925,1982.0,1939.0,1986.0,1920.0,1950.0,1935.0,1929.0,1935.0
2,1966.0,2000,1930.0,,,,1963.0,1982.0,1964.0,1894.0
3,1903.0,1936,1899.0,1921.0,1965.0,1948.0,2012.0,1996.0,1893.0,1906.0
4,1929.0,2011,1951.0,2004.0,1954.0,1949.0,1919.0,1986.0,1913.0,1915.0
5,1925.0,2000,1924.0,1997.0,1904.0,1901.0,1998.0,1936.0,1961.0,1948.0
6,1930.0,1908,2014.0,1961.0,1924.0,1957.0,1897.0,1967.0,1945.0,1919.0
7,1971.0,2000,1984.0,1926.0,1963.0,1896.0,2011.0,1985.0,1943.0,1904.0
8,1937.0,1907,1911.0,,,,,,,1993.0
9,1980.0,1912,1917.0,2009.0,2006.0,2007.0,1920.0,1939.0,1941.0,1940.0
10,1949.0,1946,2018.0,2011.0,1893.0,2010.0,1944.0,1905.0,1930.0,2009.0


## Vamonos despacito

In [50]:
## Este script solo es para obtener los objetos

objetos = [] # Para los objetos que no nos sirve

for mi_base in tqdm(files_xlsx):
    print('Se arreglará el archivo {} como usted lo pidió'.format(mi_base))
    xlsx = pd.read_excel(mi_base, index_col='ID')
    #objetos = []
    for columna in xlsx.columns:
        for tipo in xlsx[columna].unique():
            if type(tipo) == str:
                objetos.append(tipo)
    time.sleep(0.01)

100%|██████████| 3/3 [00:00<00:00, 46.39it/s]

Se arreglará el archivo Ejemplo1.xlsx como usted lo pidió
Se arreglará el archivo Ejemplo2.xlsx como usted lo pidió
Se arreglará el archivo Ejemplo3.xlsx como usted lo pidió





In [51]:
# Aquí hacemos nuestro diccionario de objetos a quitar
lista1 = list(set(objetos))
lista2 = np.empty(len(lista1))
lista2[:] = np.nan
objetos = dict(zip(lista1, lista2))

[6.93634403e-310 6.93634400e-310 6.93634403e-310 6.93634403e-310
 6.93634403e-310 6.93634407e-310 6.93634407e-310 6.93634400e-310
 6.93634408e-310 6.93634401e-310 6.93634403e-310 6.93634407e-310
 6.93634407e-310 6.93634407e-310 6.93634407e-310 6.93634407e-310
 6.93634407e-310]


In [52]:
objetos

{'Desvalorización': nan,
 'Mentiras': nan,
 'La': nan,
 'Maldita': nan,
 'Te ': nan,
 'Hora': nan,
 'Abrazos': nan,
 'errores': nan,
 'Burlas': nan,
 'Conocí': nan,
 'No': nan,
 'Soberbia': nan,
 'Balazos': nan,
 'Que': nan,
 'Falsedades': nan,
 'Guerras': nan,
 'Nada': nan}

In [57]:
## Concatenando bases
x1 = pd.read_excel(files_xlsx[0], index_col='ID')
print(x1.columns)
x2 = pd.read_excel(files_xlsx[1], index_col='ID')
print(x2.columns)
x3 = pd.read_excel(files_xlsx[2], index_col='ID')
print(x3.columns)

Index(['Var1', 'Var2', 'Var3', 'Var4', 'Var5', 'Var6', 'Var7', 'Var8', 'Var9',
       'Var10'],
      dtype='object')
Index(['Var21', 'Var22', 'Var23', 'Var24', 'Var25', 'Var26', 'Var27', 'Var28',
       'Var29', 'Var30'],
      dtype='object')
Index(['Var11', 'Var12', 'Var13', 'Var14', 'Var15', 'Var16', 'Var17', 'Var18',
       'Var19', 'Var20'],
      dtype='object')


In [56]:
xlsx = pd.concat([x1, x2, x3])
xlsx
## ¿Qué pasó aquí?

Unnamed: 0_level_0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10,...,Var11,Var12,Var13,Var14,Var15,Var16,Var17,Var18,Var19,Var20
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-313,-850,590,-171,-839,-277,770,-790,0.959602,699.0,...,,,,,,,,,,
2,473,473,-146,-292,-652,-774,-569,330,0.354079,-263.0,...,,,,,,,,,,
3,84,,207,-56,-67,Falsedades,597,-752,0.820334,420.0,...,,,,,,,,,,
4,Nada,,47,3,-422,0,-820,-699,0.428466,1983.0,...,,,,,,,,,,
5,322,859,,-346,,85,-627,835,0.467556,-273.0,...,,,,,,,,,,
6,-860,727,816,504,-830,-868,-756,Falsedades,0.724022,750.0,...,,,,,,,,,,
7,700,-507,-544,-789,-864,129,418,716,0.508727,477.0,...,,,,,,,,,,
8,247,-677,841,-458,errores,-814,742,-620,0.110713,-280.0,...,,,,,,,,,,
9,-512,573,344,-262,-595,-67,304,298,0.572773,-651.0,...,,,,,,,,,,
10,-386,507,876,Falsedades,38,-267,-8,712,0.422481,2020.0,...,,,,,,,,,,


In [58]:
xlsx = pd.concat([x1, x2, x3], axis=1)
xlsx
## ¿Por qué se tiene que colocar el argumento de "axis=1"?

Unnamed: 0_level_0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10,...,Var11,Var12,Var13,Var14,Var15,Var16,Var17,Var18,Var19,Var20
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-313,-850,590,-171,-839,-277,770,-790,0.959602,699,...,1961,1925,1982,1939,1986,1920,1950,1935,1929,1935
2,473,473,-146,-292,-652,-774,-569,330,0.354079,-263,...,1966,2000,1930,Abrazos,No,Balazos,1963,1982,1964,1894
3,84,,207,-56,-67,Falsedades,597,-752,0.820334,420,...,1903,1936,1899,1921,1965,1948,2012,1996,1893,1906
4,Nada,,47,3,-422,0,-820,-699,0.428466,1983,...,1929,2011,1951,2004,1954,1949,1919,1986,1913,1915
5,322,859,,-346,,85,-627,835,0.467556,-273,...,1925,2000,1924,1997,1904,1901,1998,1936,1961,1948
6,-860,727,816,504,-830,-868,-756,Falsedades,0.724022,750,...,1930,1908,2014,1961,1924,1957,1897,1967,1945,1919
7,700,-507,-544,-789,-864,129,418,716,0.508727,477,...,1971,2000,1984,1926,1963,1896,2011,1985,1943,1904
8,247,-677,841,-458,errores,-814,742,-620,0.110713,-280,...,1937,1907,1911,Maldita,La,Hora,Que,Te,Conocí,1993
9,-512,573,344,-262,-595,-67,304,298,0.572773,-651,...,1980,1912,1917,2009,2006,2007,1920,1939,1941,1940
10,-386,507,876,Falsedades,38,-267,-8,712,0.422481,2020,...,1949,1946,2018,2011,1893,2010,1944,1905,1930,2009


In [59]:
xlsx.columns

Index(['Var1', 'Var2', 'Var3', 'Var4', 'Var5', 'Var6', 'Var7', 'Var8', 'Var9',
       'Var10', 'Var21', 'Var22', 'Var23', 'Var24', 'Var25', 'Var26', 'Var27',
       'Var28', 'Var29', 'Var30', 'Var11', 'Var12', 'Var13', 'Var14', 'Var15',
       'Var16', 'Var17', 'Var18', 'Var19', 'Var20'],
      dtype='object')

In [60]:
xlsx.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20 entries, 1 to 20
Data columns (total 30 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Var1    17 non-null     object 
 1   Var2    18 non-null     object 
 2   Var3    18 non-null     object 
 3   Var4    20 non-null     object 
 4   Var5    19 non-null     object 
 5   Var6    20 non-null     object 
 6   Var7    20 non-null     object 
 7   Var8    14 non-null     object 
 8   Var9    20 non-null     float64
 9   Var10   20 non-null     int64  
 10  Var21   19 non-null     float64
 11  Var22   13 non-null     float64
 12  Var23   19 non-null     float64
 13  Var24   19 non-null     float64
 14  Var25   20 non-null     float64
 15  Var26   19 non-null     float64
 16  Var27   20 non-null     float64
 17  Var28   19 non-null     float64
 18  Var29   19 non-null     float64
 19  Var30   19 non-null     float64
 20  Var11   20 non-null     object 
 21  Var12   20 non-null     int64  
 22  Var1

In [61]:
## Reemplazamos los objetos
xlsx = xlsx.replace(objetos)
xlsx

Unnamed: 0_level_0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10,...,Var11,Var12,Var13,Var14,Var15,Var16,Var17,Var18,Var19,Var20
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-313.0,-850.0,590.0,-171.0,-839.0,-277.0,770.0,-790.0,0.959602,699,...,1961.0,1925,1982.0,1939.0,1986.0,1920.0,1950.0,1935.0,1929.0,1935.0
2,473.0,473.0,-146.0,-292.0,-652.0,-774.0,-569.0,330.0,0.354079,-263,...,1966.0,2000,1930.0,,,,1963.0,1982.0,1964.0,1894.0
3,84.0,,207.0,-56.0,-67.0,,597.0,-752.0,0.820334,420,...,1903.0,1936,1899.0,1921.0,1965.0,1948.0,2012.0,1996.0,1893.0,1906.0
4,,,47.0,3.0,-422.0,0.0,-820.0,-699.0,0.428466,1983,...,1929.0,2011,1951.0,2004.0,1954.0,1949.0,1919.0,1986.0,1913.0,1915.0
5,322.0,859.0,,-346.0,,85.0,-627.0,835.0,0.467556,-273,...,1925.0,2000,1924.0,1997.0,1904.0,1901.0,1998.0,1936.0,1961.0,1948.0
6,-860.0,727.0,816.0,504.0,-830.0,-868.0,-756.0,,0.724022,750,...,1930.0,1908,2014.0,1961.0,1924.0,1957.0,1897.0,1967.0,1945.0,1919.0
7,700.0,-507.0,-544.0,-789.0,-864.0,129.0,418.0,716.0,0.508727,477,...,1971.0,2000,1984.0,1926.0,1963.0,1896.0,2011.0,1985.0,1943.0,1904.0
8,247.0,-677.0,841.0,-458.0,,-814.0,742.0,-620.0,0.110713,-280,...,1937.0,1907,1911.0,,,,,,,1993.0
9,-512.0,573.0,344.0,-262.0,-595.0,-67.0,304.0,298.0,0.572773,-651,...,1980.0,1912,1917.0,2009.0,2006.0,2007.0,1920.0,1939.0,1941.0,1940.0
10,-386.0,507.0,876.0,,38.0,-267.0,-8.0,712.0,0.422481,2020,...,1949.0,1946,2018.0,2011.0,1893.0,2010.0,1944.0,1905.0,1930.0,2009.0


In [64]:
## Queremos ordenar las variables de acuerdo al número que pertenece
xlsx = xlsx.sort_index(axis = 1)
xlsx

Unnamed: 0_level_0,Var1,Var10,Var11,Var12,Var13,Var14,Var15,Var16,Var17,Var18,...,Var28,Var29,Var3,Var30,Var4,Var5,Var6,Var7,Var8,Var9
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-313.0,699,1961.0,1925,1982.0,1939.0,1986.0,1920.0,1950.0,1935.0,...,3.029325,7.640019,590.0,2.622703,-171.0,-839.0,-277.0,770.0,-790.0,0.959602
2,473.0,-263,1966.0,2000,1930.0,,,,1963.0,1982.0,...,2.958129,7.441848,-146.0,1.985383,-292.0,-652.0,-774.0,-569.0,330.0,0.354079
3,84.0,420,1903.0,1936,1899.0,1921.0,1965.0,1948.0,2012.0,1996.0,...,2.722054,6.483875,207.0,1.627964,-56.0,-67.0,,597.0,-752.0,0.820334
4,,1983,1929.0,2011,1951.0,2004.0,1954.0,1949.0,1919.0,1986.0,...,1.794001,5.69519,47.0,0.778558,3.0,-422.0,0.0,-820.0,-699.0,0.428466
5,322.0,-273,1925.0,2000,1924.0,1997.0,1904.0,1901.0,1998.0,1936.0,...,1.727131,5.653015,,,-346.0,,85.0,-627.0,835.0,0.467556
6,-860.0,750,1930.0,1908,2014.0,1961.0,1924.0,1957.0,1897.0,1967.0,...,0.839837,5.632503,816.0,6.123552,504.0,-830.0,-868.0,-756.0,,0.724022
7,700.0,477,1971.0,2000,1984.0,1926.0,1963.0,1896.0,2011.0,1985.0,...,,4.788923,-544.0,5.813818,-789.0,-864.0,129.0,418.0,716.0,0.508727
8,247.0,-280,1937.0,1907,1911.0,,,,,,...,5.354321,4.514902,841.0,5.279663,-458.0,,-814.0,742.0,-620.0,0.110713
9,-512.0,-651,1980.0,1912,1917.0,2009.0,2006.0,2007.0,1920.0,1939.0,...,4.852605,3.734873,344.0,4.900165,-262.0,-595.0,-67.0,304.0,298.0,0.572773
10,-386.0,2020,1949.0,1946,2018.0,2011.0,1893.0,2010.0,1944.0,1905.0,...,4.420036,3.609155,876.0,4.13215,,38.0,-267.0,-8.0,712.0,0.422481


In [65]:
xlsx.columns
# ¿Qué sucedió aquí?

Index(['Var1', 'Var10', 'Var11', 'Var12', 'Var13', 'Var14', 'Var15', 'Var16',
       'Var17', 'Var18', 'Var19', 'Var2', 'Var20', 'Var21', 'Var22', 'Var23',
       'Var24', 'Var25', 'Var26', 'Var27', 'Var28', 'Var29', 'Var3', 'Var30',
       'Var4', 'Var5', 'Var6', 'Var7', 'Var8', 'Var9'],
      dtype='object')

In [4]:
# Búsqueda de las Bases Reales
files_txt = gg('/Users/javier/Google Drive/Curso Python Pandas 2020/Scripts/*.txt')