# Sección de **Introducción**: *Series*

In [1]:
import pandas as pd 
import numpy as np 

In [3]:
sr = pd.Series([10, 9, 8, 7 , 6])

In [4]:
sr.values

array([10,  9,  8,  7,  6], dtype=int64)

In [5]:
sr.index

RangeIndex(start=0, stop=5, step=1)

In [7]:
sr.shape

(5,)

In [8]:
sr[[0, 4, 2]] # Doble corchete para poder seleccionar varios index

0    10
4     6
2     8
dtype: int64

In [13]:
sr = pd.Series([10, 9, 8, 7 , 6], index=['a', 'b', 'c', 'd', 'e'])
sr['b':'e']

b    9
c    8
d    7
e    6
dtype: int64

In [12]:
dict_data = {'CO':100, 'MX':200, 'AR':300}
dict_data

{'CO': 100, 'MX': 200, 'AR': 300}

In [15]:
dict_data.keys()

dict_keys(['CO', 'MX', 'AR'])

In [16]:
dict_data['MX']

200

In [22]:
sr = pd.Series(dict_data, index = ['CO', 'MX', 'PE'])

In [31]:
np.nan

nan

In [21]:
np.nan + 10

nan

In [30]:
sr.isnull()

CO    False
MX    False
PE     True
dtype: bool

# Trabajando con DF

In [33]:
dict_data = {'CH': [100, 800, 200], 'CO':[100, 200, 300], 'MX':[300, 500, 400]} 

In [34]:
df = pd.DataFrame(dict_data)
df

Unnamed: 0,CH,CO,MX
0,100,100,300
1,800,200,500
2,200,300,400


In [35]:
dict_data = {
    'edad': [10,9,13,14,12,11,12],
    'cm': [115,110,130,155,125,120,125],
    'pais':['co','mx','co','mx','mx','ch','ch'],
    'genero':['M','F','F','M','M','M','F'],
    'Q1':[5,10,8,np.nan,7,8,3],
    'Q2':[7,9,9,8,8,8,9]
}
dict_data

{'edad': [10, 9, 13, 14, 12, 11, 12],
 'cm': [115, 110, 130, 155, 125, 120, 125],
 'pais': ['co', 'mx', 'co', 'mx', 'mx', 'ch', 'ch'],
 'genero': ['M', 'F', 'F', 'M', 'M', 'M', 'F'],
 'Q1': [5, 10, 8, nan, 7, 8, 3],
 'Q2': [7, 9, 9, 8, 8, 8, 9]}

In [39]:
df = pd.DataFrame(dict_data, index=['ana','benito','camilo','daniel','erika','fabian','gabriela'])
df

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
ana,10,115,co,M,5.0,7
benito,9,110,mx,F,10.0,9
camilo,13,130,co,F,8.0,9
daniel,14,155,mx,M,,8
erika,12,125,mx,M,7.0,8
fabian,11,120,ch,M,8.0,8
gabriela,12,125,ch,F,3.0,9


In [40]:
df.loc['ana', ['edad', 'cm', 'Q1']]

edad     10
cm      115
Q1      5.0
Name: ana, dtype: object

In [41]:
df

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
ana,10,115,co,M,5.0,7
benito,9,110,mx,F,10.0,9
camilo,13,130,co,F,8.0,9
daniel,14,155,mx,M,,8
erika,12,125,mx,M,7.0,8
fabian,11,120,ch,M,8.0,8
gabriela,12,125,ch,F,3.0,9


In [42]:
df.iloc[2,1]

130

In [44]:
df.iloc[:,[1,3]]

Unnamed: 0,cm,genero
ana,115,M
benito,110,F
camilo,130,F
daniel,155,M
erika,125,M
fabian,120,M
gabriela,125,F


In [45]:
df['edad']

ana         10
benito       9
camilo      13
daniel      14
erika       12
fabian      11
gabriela    12
Name: edad, dtype: int64

In [46]:
df[df['edad']>=12]

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
camilo,13,130,co,F,8.0,9
daniel,14,155,mx,M,,8
erika,12,125,mx,M,7.0,8
gabriela,12,125,ch,F,3.0,9


In [47]:
df[(df['edad']>=12) & (df['pais']=='mx')]

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
daniel,14,155,mx,M,,8
erika,12,125,mx,M,7.0,8


In [48]:
df.query('edad>=12')

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
camilo,13,130,co,F,8.0,9
daniel,14,155,mx,M,,8
erika,12,125,mx,M,7.0,8
gabriela,12,125,ch,F,3.0,9


In [49]:
df['Q2']>=df['Q1']

ana          True
benito      False
camilo       True
daniel      False
erika        True
fabian       True
gabriela     True
dtype: bool

In [50]:
df[df['Q2']>=df['Q1']]

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
ana,10,115,co,M,5.0,7
camilo,13,130,co,F,8.0,9
erika,12,125,mx,M,7.0,8
fabian,11,120,ch,M,8.0,8
gabriela,12,125,ch,F,3.0,9


# Manejo de archivos CSV

In [59]:
%cd "C:\Users\furruchurtu\OneDrive - LITOPLAS SA\Documentos\python"

C:\Users\furruchurtu\OneDrive - LITOPLAS SA\Documentos\python


In [62]:
df.to_csv('test.csv', index=False)

df_read = pd.read_csv('test.csv')
df_read

Unnamed: 0,edad,cm,pais,genero,Q1,Q2
0,10,115,co,M,5.0,7
1,9,110,mx,F,10.0,9
2,13,130,co,F,8.0,9
3,14,155,mx,M,,8
4,12,125,mx,M,7.0,8
5,11,120,ch,M,8.0,8
6,12,125,ch,F,3.0,9


# Conectando con Postgre o con MySQL

## Para conectar con Postgres: Dar doble clic para copiar código

import pandas as pd
import psycopg2

### Se hace la conexión: No se ejecuta nada por seguridad
### **Código:**

conn_sql = psycopg2.connect(
    user = 'user_name',
    password = 'password',
    host = 'xxx.xxx.xxx.xxx',
    port = '5432',
    database = 'postgres data base'
)

query_sql = '''
select *
from table_name
limit 10
'''

### Se crea el dataframe:
df = pd.read_sql(query_sql, sql_conn)
df.head(5)

## Conectando con MySQL:

import pandas as pd
import sqlalchemy as sql

### Escogemos nuestra base de datos, Oracle, MySql o la de tu preferencia:

database_type = 'mysql'
database_type = 'oracle'

### Luego creamos el elemento de conexión con el siguiente código:

user = 'user_name'
password = 'password'
host = 'xxx.xxx.xxx.xxx:port'
database = 'database_name'

conn_string = '{}://{}:{}@{}/{}'.format(
database_type, user, password, host, database)

sql_conn = sql.create_engine(conn_string)

### Seguido simplemente definimos nuestra query en SQL:

query_sql = '''
select *
from table_name
limit 10
'''

### Y creamos nuestro dataframe con:

df = pd.read_sql(query_sql, sql_conn)
df.head(5)

# Manejo de pandas