# Pandas

In [1]:
import pandas as pd

In [2]:
print(pd.__version__)

1.3.5


## Series

### Crear

In [3]:
s = pd.Series([1,2,3,4,5], index = ['a', 'b', 'c', 'd', 'e'])
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

### Explorar

In [4]:
s.values

array([1, 2, 3, 4, 5])

In [5]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [6]:
type(s)

pandas.core.series.Series

In [7]:
type(s.values)

numpy.ndarray

In [8]:
s[1]

2

In [9]:
s['b']

2

In [10]:
s['b':'d']

b    2
c    3
d    4
dtype: int64

In [11]:
s[1:3]

b    2
c    3
dtype: int64

In [12]:
serie1 = pd.Series({'a':78, 'b':79, 'c':80})
serie1

a    78
b    79
c    80
dtype: int64

In [13]:
serie2 = pd.Series({'a':10, 'b':11, 'c':12})

## Data Frame

### Crear

In [14]:
dataframe1 = pd.DataFrame({'Serie 1':serie1, 'Serie 2':serie2})
dataframe1

Unnamed: 0,Serie 1,Serie 2
a,78,10
b,79,11
c,80,12


In [15]:
dataframe1.T

Unnamed: 0,a,b,c
Serie 1,78,79,80
Serie 2,10,11,12


In [16]:
dataframe1.values

array([[78, 10],
       [79, 11],
       [80, 12]])

In [17]:
dataframe1.index

Index(['a', 'b', 'c'], dtype='object')

In [18]:
dataframe1.columns

Index(['Serie 1', 'Serie 2'], dtype='object')

In [19]:
dataframe1.values[1,0]

79

### Agregar Nueva Columna

In [20]:
dataframe1['Total Series'] = dataframe1['Serie 1'] + dataframe1['Serie 2']

In [21]:
dataframe1

Unnamed: 0,Serie 1,Serie 2,Total Series
a,78,10,88
b,79,11,90
c,80,12,92


### Eliminar Columna

In [22]:
del dataframe1['Total Series']
dataframe1

Unnamed: 0,Serie 1,Serie 2
a,78,10
b,79,11
c,80,12


### Filtros

In [23]:
dataframe1[dataframe1['Serie 1'] >= 79]

Unnamed: 0,Serie 1,Serie 2
b,79,11
c,80,12


### loc - iloc

In [24]:
dataframe1.loc['a']

Serie 1    78
Serie 2    10
Name: a, dtype: int64

In [25]:
dataframe1.loc[['a','c']]

Unnamed: 0,Serie 1,Serie 2
a,78,10
c,80,12


In [26]:
dataframe1.iloc[0]

Serie 1    78
Serie 2    10
Name: a, dtype: int64

### Ejercicios

In [40]:
minutes = pd.Series([60,45,30,45,45,4500], index = [0,1,2,3,4,5])
rate = pd.Series([4,4,8,7,10], index= [0,1,3,4,5])
ask = pd.Series([1, 'cero',2,2,3], index= [1,2,3,4,5])
dfClass = pd.DataFrame({'Time':minutes, 'Rate':rate, 'Ask':ask})

In [33]:
dfClass

Unnamed: 0,Time,Rate,Ask
0,60,4.0,
1,45,4.0,1
2,30,,cero
3,45,8.0,2
4,45,7.0,2
5,4500,10.0,3


### Manipular errores

In [29]:
dfNewClass = dfClass.dropna()#Elimina registros con valores nulos
dfNewClass

Unnamed: 0,Time,Rate,Ask
1,45,4.0,1
3,45,8.0,2
4,45,7.0,2
5,4500,10.0,3


In [31]:
dfClass.dropna(inplace=True)#Actualiza el df actual
dfClass

Unnamed: 0,Time,Rate,Ask
1,45,4.0,1
3,45,8.0,2
4,45,7.0,2
5,4500,10.0,3


In [36]:
dfClass.fillna(333, inplace=True)
dfClass

Unnamed: 0,Time,Rate,Ask
0,60,4.0,333
1,45,4.0,1
2,30,333.0,cero
3,45,8.0,2
4,45,7.0,2
5,4500,10.0,3


In [39]:
dfClass['Rate'].fillna(7, inplace=True)
dfClass

Unnamed: 0,Time,Rate,Ask
0,60,4.0,
1,45,4.0,1
2,30,7.0,cero
3,45,8.0,2
4,45,7.0,2
5,4500,10.0,3


In [41]:
mean = dfClass['Rate'].mean()
mean

6.6

In [42]:
median = dfClass['Rate'].median()
median

7.0

In [43]:
mode = dfClass['Rate'].mode()#Valor mas frecuente
mode

0    4.0
dtype: float64

In [46]:
dfClass.Ask[dfClass.Ask == 'cero'] = 0
dfClass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfClass.Ask[dfClass.Ask == 'cero'] = 0


Unnamed: 0,Time,Rate,Ask
0,60,4.0,
1,45,4.0,1.0
2,30,,0.0
3,45,8.0,2.0
4,45,7.0,2.0
5,4500,10.0,3.0


In [48]:
dfClass.loc[5, 'Time'] = 45
dfClass

Unnamed: 0,Time,Rate,Ask
0,60,4.0,
1,45,4.0,1.0
2,30,,0.0
3,45,8.0,2.0
4,45,7.0,2.0
5,45,10.0,3.0


In [49]:
dfClass.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
dtype: bool

In [50]:
dfClass.drop_duplicates(inplace=True)
dfClass

Unnamed: 0,Time,Rate,Ask
0,60,4.0,
1,45,4.0,1.0
2,30,,0.0
3,45,8.0,2.0
4,45,7.0,2.0
5,45,10.0,3.0
