# Pandas intro docs

http://pandas.pydata.org/pandas-docs/stable/10min.html

In [4]:
import pandas as pd
import numpy as np

In [5]:
df2 = pd.DataFrame({ 'A' : 1.,
    'B' : pd.Timestamp('20130102'),
    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
    'D' : np.array([3] * 4,dtype='int32'),
    'E' : pd.Categorical(["test","train","test","train"]),
    'F' : 'foo' })

In [6]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [8]:
dates = pd.date_range('20130101', periods=6)

In [9]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [11]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.155164,1.561588,0.253941,-0.147139
2013-01-02,0.199103,2.517964,-0.493267,-0.560932
2013-01-03,0.073386,-0.463081,0.993935,1.072128
2013-01-04,-1.124602,0.041528,-1.068246,0.625864
2013-01-05,0.300916,-0.415203,0.045063,1.703552
2013-01-06,0.659381,-0.130537,-0.3386,0.104141


In [13]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-1.124602,0.041528,-1.068246,0.625864
2013-01-05,0.300916,-0.415203,0.045063,1.703552
2013-01-06,0.659381,-0.130537,-0.3386,0.104141


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [16]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.00783,0.51871,-0.101196,0.466269
std,0.609721,1.230442,0.705988,0.834979
min,-1.124602,-0.463081,-1.068246,-0.560932
25%,-0.098026,-0.344036,-0.4546,-0.084319
50%,0.136244,-0.044505,-0.146769,0.365002
75%,0.275463,1.181573,0.201721,0.960562
max,0.659381,2.517964,0.993935,1.703552


In [17]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.155164,0.199103,0.073386,-1.124602,0.300916,0.659381
B,1.561588,2.517964,-0.463081,0.041528,-0.415203,-0.130537
C,0.253941,-0.493267,0.993935,-1.068246,0.045063,-0.3386
D,-0.147139,-0.560932,1.072128,0.625864,1.703552,0.104141


In [18]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})

In [20]:
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

In [22]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [23]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [24]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [25]:
df.to_csv('playground_df1.csv')

In [26]:
pd.read_csv('playground_df1.csv')

Unnamed: 0.1,Unnamed: 0,A,B,C,D
0,2013-01-01,-0.155164,1.561588,0.253941,-0.147139
1,2013-01-02,0.199103,2.517964,-0.493267,-0.560932
2,2013-01-03,0.073386,-0.463081,0.993935,1.072128
3,2013-01-04,-1.124602,0.041528,-1.068246,0.625864
4,2013-01-05,0.300916,-0.415203,0.045063,1.703552
5,2013-01-06,0.659381,-0.130537,-0.3386,0.104141


In [27]:
df.to_hdf('playground_df1.h5', 'df')

ImportError: HDFStore requires PyTables, "No module named 'tables'" problem importing

In [28]:
df.to_excel('playground_df1.xlsx', sheet_name='Sheet1')

ModuleNotFoundError: No module named 'openpyxl'

In [30]:
df.to_json('playground_df1.columns.json', 'columns')

In [32]:
df.to_json('playground_df1.split.json', 'split')

In [33]:
df.to_json('playground_df1.records.json', 'records')

In [34]:
df.to_json('playground_df1.index.json', 'index')