In [9]:
import pandas as pd
import numpy as np

In [4]:
df = pd.DataFrame([2,4,6,8])

In [6]:
df.columns.name = "Index"
df

Index,0
0,2
1,4
2,6
3,8


In [7]:
dates = pd.date_range('20150125', periods=7)
dates

DatetimeIndex(['2015-01-25', '2015-01-26', '2015-01-27', '2015-01-28',
               '2015-01-29', '2015-01-30', '2015-01-31'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df = pd.DataFrame(np.random.randn(7,5), index=dates, columns={'Adam','Bob','Carla','Dave','Eve'})
df

Unnamed: 0,Eve,Adam,Dave,Bob,Carla
2015-01-25,-0.594398,0.354574,-1.800184,1.102192,-1.335609
2015-01-26,0.859395,-0.15473,-0.600227,0.486813,-0.9874
2015-01-27,3.055485,-1.191046,-0.545422,-0.558445,0.696324
2015-01-28,-0.839616,-0.06584,-1.552635,-0.083895,0.495785
2015-01-29,0.108804,0.295297,-0.827451,1.499309,2.043015
2015-01-30,-0.32529,-0.308536,0.933446,-1.37599,1.589074
2015-01-31,1.817642,0.041496,-0.359412,-0.539146,0.759547


In [12]:
df2 = pd.DataFrame({ 'A' : np.random.random_sample(4), # 4 random numbers
                     'B' : pd.Timestamp('20130102'), # 4 dates, note pandas autofills
                     'C' : pd.date_range('20150125',periods = 4), # 4 dates in a range
                     'D' : ['a','b','c','d'], # letters
                     'E' : ["cat","dog","mouse","parrot"], # text/string
                     'F' : 'copy'}) # note pandas autofills
df2

Unnamed: 0,A,B,C,D,E,F
0,0.459838,2013-01-02,2015-01-25,a,cat,copy
1,0.664234,2013-01-02,2015-01-26,b,dog,copy
2,0.075784,2013-01-02,2015-01-27,c,mouse,copy
3,0.305348,2013-01-02,2015-01-28,d,parrot,copy


In [13]:
df2.dtypes

A           float64
B    datetime64[ns]
C    datetime64[ns]
D            object
E            object
F            object
dtype: object

In [14]:
df2.index

RangeIndex(start=0, stop=4, step=1)

In [15]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [16]:
df2.values

array([[0.4598377526045867, Timestamp('2013-01-02 00:00:00'),
        Timestamp('2015-01-25 00:00:00'), 'a', 'cat', 'copy'],
       [0.6642338855994059, Timestamp('2013-01-02 00:00:00'),
        Timestamp('2015-01-26 00:00:00'), 'b', 'dog', 'copy'],
       [0.07578382085467139, Timestamp('2013-01-02 00:00:00'),
        Timestamp('2015-01-27 00:00:00'), 'c', 'mouse', 'copy'],
       [0.30534817865643427, Timestamp('2013-01-02 00:00:00'),
        Timestamp('2015-01-28 00:00:00'), 'd', 'parrot', 'copy']],
      dtype=object)

In [17]:
df2.describe()

Unnamed: 0,A
count,4.0
mean,0.376301
std,0.248481
min,0.075784
25%,0.247957
50%,0.382593
75%,0.510937
max,0.664234


In [18]:
df2.T

Unnamed: 0,0,1,2,3
A,0.459838,0.664234,0.0757838,0.305348
B,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00,2013-01-02 00:00:00
C,2015-01-25 00:00:00,2015-01-26 00:00:00,2015-01-27 00:00:00,2015-01-28 00:00:00
D,a,b,c,d
E,cat,dog,mouse,parrot
F,copy,copy,copy,copy


In [19]:
df.sort_values(by = 'Bob')

Unnamed: 0,Eve,Adam,Dave,Bob,Carla
2015-01-30,-0.32529,-0.308536,0.933446,-1.37599,1.589074
2015-01-27,3.055485,-1.191046,-0.545422,-0.558445,0.696324
2015-01-31,1.817642,0.041496,-0.359412,-0.539146,0.759547
2015-01-28,-0.839616,-0.06584,-1.552635,-0.083895,0.495785
2015-01-26,0.859395,-0.15473,-0.600227,0.486813,-0.9874
2015-01-25,-0.594398,0.354574,-1.800184,1.102192,-1.335609
2015-01-29,0.108804,0.295297,-0.827451,1.499309,2.043015


In [20]:
df[df['Eve'] > 0]

Unnamed: 0,Eve,Adam,Dave,Bob,Carla
2015-01-26,0.859395,-0.15473,-0.600227,0.486813,-0.9874
2015-01-27,3.055485,-1.191046,-0.545422,-0.558445,0.696324
2015-01-29,0.108804,0.295297,-0.827451,1.499309,2.043015
2015-01-31,1.817642,0.041496,-0.359412,-0.539146,0.759547


In [22]:
nonneg_only = df[df > 0]
nonneg_only

Unnamed: 0,Eve,Adam,Dave,Bob,Carla
2015-01-25,,0.354574,,1.102192,
2015-01-26,0.859395,,,0.486813,
2015-01-27,3.055485,,,,0.696324
2015-01-28,,,,,0.495785
2015-01-29,0.108804,0.295297,,1.499309,2.043015
2015-01-30,,,0.933446,,1.589074
2015-01-31,1.817642,0.041496,,,0.759547


In [23]:
nonneg_only.dropna()

Unnamed: 0,Eve,Adam,Dave,Bob,Carla


In [24]:
# set them to zero instead
nonneg_only.fillna(value = 0)

Unnamed: 0,Eve,Adam,Dave,Bob,Carla
2015-01-25,0.0,0.354574,0.0,1.102192,0.0
2015-01-26,0.859395,0.0,0.0,0.486813,0.0
2015-01-27,3.055485,0.0,0.0,0.0,0.696324
2015-01-28,0.0,0.0,0.0,0.0,0.495785
2015-01-29,0.108804,0.295297,0.0,1.499309,2.043015
2015-01-30,0.0,0.0,0.933446,0.0,1.589074
2015-01-31,1.817642,0.041496,0.0,0.0,0.759547


In [25]:
df2 = df.copy()
df2['color']=['blue', 'green','red','blue','green','red','blue']
df2

Unnamed: 0,Eve,Adam,Dave,Bob,Carla,color
2015-01-25,-0.594398,0.354574,-1.800184,1.102192,-1.335609,blue
2015-01-26,0.859395,-0.15473,-0.600227,0.486813,-0.9874,green
2015-01-27,3.055485,-1.191046,-0.545422,-0.558445,0.696324,red
2015-01-28,-0.839616,-0.06584,-1.552635,-0.083895,0.495785,blue
2015-01-29,0.108804,0.295297,-0.827451,1.499309,2.043015,green
2015-01-30,-0.32529,-0.308536,0.933446,-1.37599,1.589074,red
2015-01-31,1.817642,0.041496,-0.359412,-0.539146,0.759547,blue


In [26]:
df2[(df2['color'] == 'green') | (df2['color'] == 'blue')]

df2[df2['color'].isin(['green','blue'])]

Unnamed: 0,Eve,Adam,Dave,Bob,Carla,color
2015-01-25,-0.594398,0.354574,-1.800184,1.102192,-1.335609,blue
2015-01-26,0.859395,-0.15473,-0.600227,0.486813,-0.9874,green
2015-01-28,-0.839616,-0.06584,-1.552635,-0.083895,0.495785,blue
2015-01-29,0.108804,0.295297,-0.827451,1.499309,2.043015,green
2015-01-31,1.817642,0.041496,-0.359412,-0.539146,0.759547,blue


In [27]:
frame_one = pd.DataFrame(np.random.randn(5, 4))
frame_two = pd.DataFrame(np.random.randn(5, 4))
pd.concat([frame_one, frame_two])

Unnamed: 0,0,1,2,3
0,0.098458,0.352647,-0.226132,-0.212292
1,-0.802403,0.812487,0.05191,-0.419202
2,0.382539,-0.691836,-1.897864,-0.352557
3,-0.317451,0.342897,0.377241,0.742086
4,-0.490925,0.049263,-0.235367,0.034409
0,0.385179,0.491085,-0.805751,0.095701
1,-1.483061,-0.103381,-0.29497,-0.795574
2,-2.229632,-1.255831,1.186378,0.343793
3,0.890929,0.402436,0.496771,-0.347197
4,-0.815891,0.990842,-0.5439,0.456994


In [28]:
left = pd.DataFrame({'key': ['foo','far'], 'lval':[1,2]})
right = pd.DataFrame({'key':['foo', 'foo', 'bar'], 'rval':[3,4,5]})

pd.merge(left,right, on = 'key')

Unnamed: 0,key,lval,rval
0,foo,1,3
1,foo,1,4


In [29]:
foo_bar = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar','foo', 'bar', 'foo', 'foo'],
                            'B' : ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                            'C' : np.random.randn(8),
                            'D' : np.random.randn(8)})

foo_bar

Unnamed: 0,A,B,C,D
0,foo,one,0.745589,0.889573
1,bar,one,0.522196,1.166998
2,foo,two,0.094883,-0.152321
3,bar,three,-0.666723,1.947593
4,foo,two,-0.75867,-0.190475
5,bar,two,0.261167,-0.976714
6,foo,one,-0.251056,-0.418574
7,foo,three,0.426163,0.330458


In [30]:
foo_bar.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.116641,2.137877
foo,0.256908,0.458662
