# Seleccionar datos de Frameworks con pandas y numpy

In [30]:
import pandas as pd
import numpy as np

In [31]:
lista_valores = np.arange(25).reshape(5,5)
lista_valores

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [32]:
lista_indice = ['i1', 'i2', 'i3', 'i4', 'i5']
lista_indice

['i1', 'i2', 'i3', 'i4', 'i5']

In [33]:
lista_columnas = ['c1', 'c2', 'c3', 'c4', 'c5']
lista_columnas

['c1', 'c2', 'c3', 'c4', 'c5']

In [34]:
dataframe = pd.DataFrame(lista_valores, index=lista_indice, columns=lista_columnas)
dataframe

Unnamed: 0,c1,c2,c3,c4,c5
i1,0,1,2,3,4
i2,5,6,7,8,9
i3,10,11,12,13,14
i4,15,16,17,18,19
i5,20,21,22,23,24


In [36]:
dataframe['c2']

i1     1
i2     6
i3    11
i4    16
i5    21
Name: c2, dtype: int64

In [39]:
dataframe['c3']['i5']

22

In [41]:
dataframe[['c2','c4']]

Unnamed: 0,c2,c4
i1,1,3
i2,6,8
i3,11,13
i4,16,18
i5,21,23


In [52]:
dataframe[dataframe['c2'] > 15]

Unnamed: 0,c1,c2,c3,c4,c5
i4,15,16,17,18,19
i5,20,21,22,23,24


In [53]:
df = dataframe
df

Unnamed: 0,c1,c2,c3,c4,c5
i1,0,1,2,3,4
i2,5,6,7,8,9
i3,10,11,12,13,14
i4,15,16,17,18,19
i5,20,21,22,23,24


In [57]:
df > 20

Unnamed: 0,c1,c2,c3,c4,c5
i1,False,False,False,False,False
i2,False,False,False,False,False
i3,False,False,False,False,False
i4,False,False,False,False,False
i5,False,True,True,True,True


In [60]:
df

Unnamed: 0,c1,c2,c3,c4,c5
i1,0,1,2,3,4
i2,5,6,7,8,9
i3,10,11,12,13,14
i4,15,16,17,18,19
i5,20,21,22,23,24


In [61]:
df.loc['i3']

c1    10
c2    11
c3    12
c4    13
c5    14
Name: i3, dtype: int64

In [62]:
df.loc['i3']['c4']

13

# Operaciones sobre Series y Dataframes

In [1]:
import pandas as pd
import numpy as np

In [3]:
s1 = pd.Series([0,1,2], index=['a', 'b', 'c'])
s1

a    0
b    1
c    2
dtype: int64

In [4]:
s2 = pd.Series([3,4,5,6], index=['a', 'b', 'c', 'd'])
s2

a    3
b    4
c    5
d    6
dtype: int64

In [5]:
s1+s2

a    3.0
b    5.0
c    7.0
d    NaN
dtype: float64

In [6]:
lista_valores = np.arange(4).reshape(2,2)
lista_valores

array([[0, 1],
       [2, 3]])

In [7]:
lista_indices = list('ab')
lista_indices

['a', 'b']

In [8]:
lista_columnas = list('12')
lista_columnas

['1', '2']

In [10]:
df = pd.DataFrame(lista_valores, index=lista_indices, columns=lista_columnas)
df

Unnamed: 0,1,2
a,0,1
b,2,3


In [11]:
listaValores = np.arange(9).reshape(3,3)

In [12]:
listaIndices = list('abc')
listaColumnas = list('123')

In [13]:
df2 = pd.DataFrame(listaValores, index=listaIndices, columns=listaColumnas)
df2

Unnamed: 0,1,2,3
a,0,1,2
b,3,4,5
c,6,7,8


In [14]:
df + df2

Unnamed: 0,1,2,3
a,0.0,2.0,
b,5.0,7.0,
c,,,


In [16]:
df.add(df2, fill_value=0)

Unnamed: 0,1,2,3
a,0.0,2.0,2.0
b,5.0,7.0,5.0
c,6.0,7.0,8.0


# Ordenar Y clasificar Series

In [17]:
import pandas as pd
import numpy as np

In [19]:
range( 4)

range(0, 4)

In [20]:
lista_valores = range(4)

In [27]:
lista_indices = list('CABD')

In [28]:
lista_indices

['C', 'A', 'B', 'D']

In [29]:
serie = pd.Series(lista_valores, index=lista_indices)
serie

C    0
A    1
B    2
D    3
dtype: int64

In [30]:
serie.sort_index()

A    1
B    2
C    0
D    3
dtype: int64

In [32]:
serie.sort_values()

C    0
A    1
B    2
D    3
dtype: int64

In [33]:
serie.rank()

C    1.0
A    2.0
B    3.0
D    4.0
dtype: float64

In [39]:
serie2 = pd.Series(np.random.randn(10))
serie2

0    0.497775
1    0.613514
2    1.406727
3    0.302398
4    1.877099
5   -0.331372
6    0.641607
7   -0.672114
8    0.682540
9    1.066553
dtype: float64

In [40]:
serie2.rank()

0     4.0
1     5.0
2     9.0
3     3.0
4    10.0
5     2.0
6     6.0
7     1.0
8     7.0
9     8.0
dtype: float64

In [41]:
serie2.sort_values()

7   -0.672114
5   -0.331372
3    0.302398
0    0.497775
1    0.613514
6    0.641607
8    0.682540
9    1.066553
2    1.406727
4    1.877099
dtype: float64

In [42]:
serie2.sort_index()

0    0.497775
1    0.613514
2    1.406727
3    0.302398
4    1.877099
5   -0.331372
6    0.641607
7   -0.672114
8    0.682540
9    1.066553
dtype: float64

In [43]:
serie2

0    0.497775
1    0.613514
2    1.406727
3    0.302398
4    1.877099
5   -0.331372
6    0.641607
7   -0.672114
8    0.682540
9    1.066553
dtype: float64

# Estadisticas en Dataframe

In [44]:
import pandas as pd
import numpy as np

In [45]:
array = np.array([[1,8,3], [5,6,7]])
array

array([[1, 8, 3],
       [5, 6, 7]])

In [49]:
df = pd.DataFrame(array, index=['a', 'b'], columns=list('123'))
df

Unnamed: 0,1,2,3
a,1,8,3
b,5,6,7


In [55]:
df.sum()

1     6
2    14
3    10
dtype: int64

In [58]:
df

Unnamed: 0,1,2,3
a,1,8,3
b,5,6,7


In [59]:
df.sum(axis=1)

a    12
b    18
dtype: int64

In [61]:
df.min()

1    1
2    6
3    3
dtype: int64

In [62]:
# Para saber el maximo por fila

In [64]:
df

Unnamed: 0,1,2,3
a,1,8,3
b,5,6,7


In [65]:
df.max(axis=1)

a    8
b    7
dtype: int64

In [66]:
# Para saber el valor delñ indice

In [68]:
df

Unnamed: 0,1,2,3
a,1,8,3
b,5,6,7


In [69]:
df.idxmin()

1    a
2    b
3    a
dtype: object

In [70]:
 df.describe()

Unnamed: 0,1,2,3
count,2.0,2.0,2.0
mean,3.0,7.0,5.0
std,2.828427,1.414214,2.828427
min,1.0,6.0,3.0
25%,2.0,6.5,4.0
50%,3.0,7.0,5.0
75%,4.0,7.5,6.0
max,5.0,8.0,7.0


# Valores Nulos con Numpy

In [71]:
import pandas as pd
import numpy as np

In [72]:
lista_valores = ['1','2',np.nan, '4']
lista_valores

['1', '2', nan, '4']

In [73]:
serie = pd.Series(lista_valores, index=list('abcd'))
serie

a      1
b      2
c    NaN
d      4
dtype: object

In [74]:
serie.isnull()

a    False
b    False
c     True
d    False
dtype: bool

In [75]:
serie.dropna()

a    1
b    2
d    4
dtype: object

In [76]:
serie

a      1
b      2
c    NaN
d      4
dtype: object

# Valores Nulos con Pandas

In [77]:
listaValores = [[1,2,3], [4,np.nan,5],[6,7, np.nan]]
listaValores

[[1, 2, 3], [4, nan, 5], [6, 7, nan]]

In [80]:
listaValores

[[1, 2, 3], [4, nan, 5], [6, 7, nan]]

In [81]:
listaIndices = list('123')
listaIndices

['1', '2', '3']

In [82]:
listaColumnas = list('abc')
listaColumnas

['a', 'b', 'c']

In [83]:
df = pd.DataFrame(listaValores, index=listaIndices, columns=listaColumnas)
df

Unnamed: 0,a,b,c
1,1,2.0,3.0
2,4,,5.0
3,6,7.0,


In [84]:
df.isnull()

Unnamed: 0,a,b,c
1,False,False,False
2,False,True,False
3,False,False,True


In [85]:
df.dropna()

Unnamed: 0,a,b,c
1,1,2.0,3.0


In [86]:
df

Unnamed: 0,a,b,c
1,1,2.0,3.0
2,4,,5.0
3,6,7.0,


In [87]:
df.fillna(0)

Unnamed: 0,a,b,c
1,1,2.0,3.0
2,4,0.0,5.0
3,6,7.0,0.0


# Jerarquia en los Indices

In [107]:
import pandas as pd
import numpy as np

In [108]:
listaValores = np.random.randn(6)
listaValores

array([ 0.79466758, -0.5386715 , -0.16171622,  1.15927696,  0.11995435,
        0.5480753 ])

In [109]:
listaIndices = [[1,1,1,2,2,2,], ['a','b','c','a','b','c']]
listaIndices

[[1, 1, 1, 2, 2, 2], ['a', 'b', 'c', 'a', 'b', 'c']]

In [110]:
serie =  pd.Series(listaValores, index=listaIndices)
serie

1  a    0.794668
   b   -0.538672
   c   -0.161716
2  a    1.159277
   b    0.119954
   c    0.548075
dtype: float64

In [111]:
serie[2]

a    1.159277
b    0.119954
c    0.548075
dtype: float64

In [112]:
serie[1]['b']

-0.5386715044654986

In [113]:
serie

1  a    0.794668
   b   -0.538672
   c   -0.161716
2  a    1.159277
   b    0.119954
   c    0.548075
dtype: float64

In [114]:
df = serie.unstack()
df

Unnamed: 0,a,b,c
1,0.794668,-0.538672,-0.161716
2,1.159277,0.119954,0.548075


In [115]:
lista_Valores = np.arange(16).reshape(4,4)
lista_Valores

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [116]:
lista_indices = list('1234')
lista_columnas = list('abcd')

In [117]:
df = pd.DataFrame(lista_Valores, index=lista_indices, columns=lista_columnas)
df

Unnamed: 0,a,b,c,d
1,0,1,2,3
2,4,5,6,7
3,8,9,10,11
4,12,13,14,15


In [118]:
serie2 = df.stack()

In [119]:
serie2

1  a     0
   b     1
   c     2
   d     3
2  a     4
   b     5
   c     6
   d     7
3  a     8
   b     9
   c    10
   d    11
4  a    12
   b    13
   c    14
   d    15
dtype: int64

# Ejercicio_1

In [127]:
import pandas as pd
import numpy as np

In [128]:
mini:int = 10
maxi:int = 40
num:int = 3
alumnos = np.random.randint(mini,maxi,num)
alumnos

array([35, 27, 23])

In [129]:
clases = ['clase1', 'clase2', 'clase3']
clases

['clase1', 'clase2', 'clase3']

In [130]:
serie = pd.Series(alumnos, index=clases)
serie

clase1    35
clase2    27
clase3    23
dtype: int64

In [None]:
serie['clase']

27