# PANDAS - ESTRUCTURAS DE DATOS




---


###Crear una Serie


---



**A partir de un arreglo de Numpy**

In [0]:
import numpy as np
import pandas as pd

#Sin indicar los índices
s = pd.Series(np.random.randn(5))
print(s)
print(s.index)

0   -0.849116
1    0.038673
2   -1.347996
3   -0.166234
4   -0.600857
dtype: float64
RangeIndex(start=0, stop=5, step=1)


In [0]:
#Indicando los índices

s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
print(s)
print(s.index)

a    0.879262
b    0.335051
c   -0.465252
d    0.586817
e    1.857740
dtype: float64
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')


**A partir de un diccionario**

In [0]:
d = {'b' : 1, 'a' : 0, 'c' : 2}
s1 = pd.Series(d)
print(s1)
print(s1.index)


b    1
a    0
c    2
dtype: int64
Index(['b', 'a', 'c'], dtype='object')


**A partir de un escalar**

In [0]:
s2 = pd.Series(8., index=['a', 'b', 'c', 'd', 'e'])
print(s2)

a    8.0
b    8.0
c    8.0
d    8.0
e    8.0
dtype: float64




---

###Operando Series como arreglos de Numpy


---



In [0]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
print("s=\n",s)
print(s[1])
print(s[:3])
print(s[[4, 3, 1]])

s=
 a    0.786928
b   -0.195473
c   -0.638175
d    1.690536
e    2.650397
dtype: float64
-0.1954734462385708
a    0.786928
b   -0.195473
c   -0.638175
dtype: float64
e    2.650397
d    1.690536
b   -0.195473
dtype: float64


In [0]:
print("s=\n",s)
print(s + s)
print(s*2)
print(np.exp(s))
print(np.sin(s))

In [0]:
print(s.median())
#Uso para filtrar datos (tema que veremos más adelante)
print(s[s > s.median()])

In [0]:
print(s.append(pd.Series(7, index=['f'])))

a    0.786928
b   -0.195473
c   -0.638175
d    1.690536
e    2.650397
f    7.000000
dtype: float64




---
###Operando Series como Diccionarios


---




In [0]:
print("s=\n",s)
print(s['a'])
print('a' in s)
print('f' in s)
s['g']=50
print("s=\n",s)




---
###Salvando a un csv


---




In [0]:
s.to_csv('serie.csv')



---


###Crear un DataFrame a partir de un diccionario


---



In [0]:
d = {'Código': [20152300120, 20153300123, 20172400322, 20172400436], 
'Nota1': [3.3, 4.1, 1.5, 2.0], 'Nota2': [2.1, 3.8, 3.5, 3.6], 'Nota3': [3.3, 4.1, 1.5, 4.1] }
df = pd.DataFrame(data=d)
print(df)

        Código  Nota1  Nota2  Nota3
0  20152300120    3.3    2.1    3.3
1  20153300123    4.1    3.8    4.1
2  20172400322    1.5    3.5    1.5
3  20172400436    2.0    3.6    4.1


**Estableciendo índices**

In [0]:
df = df.set_index('Código')
print(df)

             Nota1  Nota2  Nota3
Código                          
20152300120    3.3    2.1    3.3
20153300123    4.1    3.8    4.1
20172400322    1.5    3.5    1.5
20172400436    2.0    3.6    4.1


In [0]:
print(df.index)
print(df.columns)

Int64Index([20152300120, 20153300123, 20172400322, 20172400436], dtype='int64', name='Código')
Index(['Nota1', 'Nota2', 'Nota3'], dtype='object')


**Agregando índices**

In [0]:
d = {'one' : [1., 2., 3., 4.],'two' : [4., 3., 2., 1.]}
df1= pd.DataFrame(d)
print(df1)
df2= pd.DataFrame(d, index=['a', 'b', 'c', 'd'])
print(df2)
df1 = df1.set_index([['a','b','c','d']])
print(df1)

**Creando un DF de un diccionario de Series**

In [0]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print(df)

In [0]:
df1 = pd.DataFrame(d, index=['d', 'b', 'a'])
print(df1)

In [0]:
df2 = pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])
print(df2)



---

###Crear un DataFrame a partir de una lista de diccinarios


---



In [0]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data2)
print(df)

   a   b     c
0  1   2   NaN
1  5  10  20.0


In [0]:
df1 = pd.DataFrame(data2, index=['first', 'second'])
print(df1)

        a   b     c
first   1   2   NaN
second  5  10  20.0


In [0]:
df2 = pd.DataFrame(data2, index=['first', 'second'], columns=['a', 'b'])
print(df2)



---

###Crear un DataFrame a partir de un arreglo de Numpy


---



In [0]:
a = np.random.randint(low=0, high=10, size=(5, 5))
print(a)
df2 = pd.DataFrame(data=a)
print(df2)

**Definiendo las Columnas**

In [0]:
df3 = pd.DataFrame(data=a, columns=['punt1', 'punt2', 'punt3', 'punt4', 'punt5'])
df3

**Definiendo los Índices**

In [0]:
df3 = pd.DataFrame(data=a, columns=['punt1', 'punt2', 'punt3', 'punt4', 'punt5'], index = ['est1', 'est2', 'est3', 'est4', 'est5'])
df3

**Renombrando Índices y Columnas**

In [0]:
df2.rename(columns={0: "a", 1: "b", 2: "c", 3: "d", 4: "e"})

In [0]:
df2.rename(index={0: "a", 1: "b", 2: "c", 3: "d", 4: "e"})

In [0]:
df3.rename(columns={0: "a", 'punt2': "b", 2: "c", 3: "d", 4: "e"})



---
###Salvar a CSV


---




In [0]:
df3.to_csv('notas.csv')



---

###Crear un DataFrame desde un CSV


---



https://www.datos.gov.co/ -> Descubre -> Docentes de planta

In [0]:
docentes = pd.read_csv('Docentes_De_Planta_2017-2.csv')

FileNotFoundError: ignored

In [0]:
docentes

**Conociendo el Data Set importado**

In [0]:
docentes.head()

In [0]:
docentes.tail()

In [0]:
docentes.columns

In [0]:
docentes.index

NameError: ignored

In [0]:
docentes.dtypes

In [0]:
docentes.describe()

**Configurar un índice**

In [0]:
docentes = pd.read_csv('Docentes_De_Planta_2017-2.csv', index_col = 'Programa Académico')
docentes

FileNotFoundError: ignored