# 10 Minutes to pandas

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Object Creation

In [2]:
# Creando un Seriespaso pasando una lista de valores, permitiendo que los pandas creen un índice entero predeterminado:
s = pd.Series([1,3,5,np.nan,6,8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
# Crear una DataFramepasando una matriz NumPy, con un índice de fecha y hora y columnas etiquetadas:
dates = pd.date_range('20130101', periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.78278,0.824447,0.800016,0.685825
2013-01-02,1.207186,-2.083171,0.245769,-0.98331
2013-01-03,1.22849,0.829829,3.11471,-0.889629
2013-01-04,2.342937,0.114875,0.847913,-0.757473
2013-01-05,-0.655326,0.00597,-0.60976,-0.597101
2013-01-06,-1.778144,0.557904,-1.927171,0.218821


In [8]:
df2 = pd.DataFrame({ 'A' : 1., 'B' : pd.Timestamp('20130102'), 'C' : pd.Series(1,index=list(range(4)),dtype='float32'), 'D' : np.array([3] * 4,dtype='int32'), 'E' : pd.Categorical(["test","train","test","train"]), 'F' : 'foo' })

In [9]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.78278,0.824447,0.800016,0.685825
2013-01-02,1.207186,-2.083171,0.245769,-0.98331
2013-01-03,1.22849,0.829829,3.11471,-0.889629
2013-01-04,2.342937,0.114875,0.847913,-0.757473
2013-01-05,-0.655326,0.00597,-0.60976,-0.597101


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,2.342937,0.114875,0.847913,-0.757473
2013-01-05,-0.655326,0.00597,-0.60976,-0.597101
2013-01-06,-1.778144,0.557904,-1.927171,0.218821


In [13]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.260394,0.041642,0.411913,-0.387145
std,1.565439,1.097453,1.684551,0.679351
min,-1.778144,-2.083171,-1.927171,-0.98331
25%,-0.750916,0.033196,-0.395878,-0.85659
50%,0.27593,0.33639,0.522892,-0.677287
75%,1.223164,0.757811,0.835939,0.01484
max,2.342937,0.829829,3.11471,0.685825


In [14]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.78278,1.207186,1.22849,2.342937,-0.655326,-1.778144
B,0.824447,-2.083171,0.829829,0.114875,0.00597,0.557904
C,0.800016,0.245769,3.11471,0.847913,-0.60976,-1.927171
D,0.685825,-0.98331,-0.889629,-0.757473,-0.597101,0.218821


In [15]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.685825,0.800016,0.824447,-0.78278
2013-01-02,-0.98331,0.245769,-2.083171,1.207186
2013-01-03,-0.889629,3.11471,0.829829,1.22849
2013-01-04,-0.757473,0.847913,0.114875,2.342937
2013-01-05,-0.597101,-0.60976,0.00597,-0.655326
2013-01-06,0.218821,-1.927171,0.557904,-1.778144


In [16]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-02,1.207186,-2.083171,0.245769,-0.98331
2013-01-05,-0.655326,0.00597,-0.60976,-0.597101
2013-01-04,2.342937,0.114875,0.847913,-0.757473
2013-01-06,-1.778144,0.557904,-1.927171,0.218821
2013-01-01,-0.78278,0.824447,0.800016,0.685825
2013-01-03,1.22849,0.829829,3.11471,-0.889629
