In [1]:
import numpy as np
import pandas as pd


In [2]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])


In [3]:
s

a   -1.837805
b   -0.138648
c    0.877286
d    0.992542
e   -0.151829
dtype: float64

In [4]:
s.index

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [5]:
pd.Series(np.random.randn(5))


0    0.748973
1    0.129120
2    1.180487
3    0.710603
4    0.632768
dtype: float64

In [6]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}


In [7]:
d

{'a': 0.0, 'b': 1.0, 'c': 2.0}

In [8]:
pd.Series(d, index=['b', 'c', 'd', 'a'])


b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [9]:
pd.Series(d)


a    0.0
b    1.0
c    2.0
dtype: float64

In [10]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [11]:
s


a   -1.837805
b   -0.138648
c    0.877286
d    0.992542
e   -0.151829
dtype: float64

In [12]:
s(0)

TypeError: 'Series' object is not callable

In [14]:
s(0)

TypeError: 'Series' object is not callable

In [15]:
s(0)

TypeError: 'Series' object is not callable

In [16]:
s[0]

-1.8378052286655704

In [17]:
s[:3]

a   -1.837805
b   -0.138648
c    0.877286
dtype: float64

In [18]:
s[:]

a   -1.837805
b   -0.138648
c    0.877286
d    0.992542
e   -0.151829
dtype: float64

In [20]:
s[s > s.median()] # identif elements with value greater than the median

c    0.877286
d    0.992542
dtype: float64

In [21]:
s[s > s.median()] # identif elements with value greater than the median

c    0.877286
d    0.992542
dtype: float64

In [22]:
s[[4, 3, 1]]

e   -0.151829
d    0.992542
b   -0.138648
dtype: float64

In [23]:
s[[4, 3, 1]] # list elements in different order

e   -0.151829
d    0.992542
b   -0.138648
dtype: float64

In [24]:
np.exp(s) # convert elements to exponential 

a    0.159166
b    0.870534
c    2.404366
d    2.698085
e    0.859135
dtype: float64

In [25]:
s['a']

-1.8378052286655704

In [27]:
s['a','c']

KeyError: ('a', 'c')

In [28]:
s['e'] = 12 # put valeu in 'c'

In [29]:
s

a    -1.837805
b    -0.138648
c     0.877286
d     0.992542
e    12.000000
dtype: float64

In [30]:
s['f']

KeyError: 'f'

In [31]:
'f' in s

False

In [34]:
if 'f' in s:
    print (s['f'])
else:
    print (s['e'])
    

12.0


In [35]:
s.get('f')

In [36]:
s.get('e')

12.0

In [37]:
s.get('f')  # will only bring it back if it's available

In [38]:
s.get('f', 'NaN'')  # will only bring it back if its available otherwise it returns NaN

SyntaxError: EOL while scanning string literal (<ipython-input-38-029af81da48b>, line 1)

In [39]:
s.get('f', np.NaN)  # will only bring it back if its available otherwise it returns NaN

nan

In [40]:
s

a    -1.837805
b    -0.138648
c     0.877286
d     0.992542
e    12.000000
dtype: float64

In [41]:
s+s

a    -3.675610
b    -0.277297
c     1.754573
d     1.985084
e    24.000000
dtype: float64

In [42]:
s *3

a    -5.513416
b    -0.415945
c     2.631859
d     2.977627
e    36.000000
dtype: float64

In [43]:
np.exp(s)

a         0.159166
b         0.870534
c         2.404366
d         2.698085
e    162754.791419
dtype: float64

In [44]:
s[1:]

b    -0.138648
c     0.877286
d     0.992542
e    12.000000
dtype: float64

In [46]:
s[1:]

b    -0.138648
c     0.877286
d     0.992542
e    12.000000
dtype: float64

# s[:-1]

In [47]:
s[:-1]

a   -1.837805
b   -0.138648
c    0.877286
d    0.992542
dtype: float64

In [48]:
s[1:] + s[:-1]

a         NaN
b   -0.277297
c    1.754573
d    1.985084
e         NaN
dtype: float64

In [49]:
s[1:] + s[:-1]  # outups NaN as certain elements of vector are missing

a         NaN
b   -0.277297
c    1.754573
d    1.985084
e         NaN
dtype: float64

In [50]:
s[1:] + s[:-1]  # returns NaN as certain elements of vector are missing, i.e. both arrays do not align when added

a         NaN
b   -0.277297
c    1.754573
d    1.985084
e         NaN
dtype: float64

In [51]:
s = pd.Series(np.random.randn(5), name='something')  # assign a name to object

In [52]:
s


0   -0.132575
1    1.409691
2    1.312414
3    0.701974
4    3.345616
Name: something, dtype: float64

In [53]:
something

NameError: name 'something' is not defined

In [54]:
s['f'] = 0  # add new element with a value to the series

In [55]:
s


0   -0.132575
1    1.409691
2    1.312414
3    0.701974
4    3.345616
f    0.000000
Name: something, dtype: float64

In [56]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [57]:
d


{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [58]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}  #  Set up two pandas series with two sets of data 

In [59]:
d


{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [60]:
df = pd.dataframe(d)  # Create a dataframe using series in d

AttributeError: 'module' object has no attribute 'dataframe'

In [61]:
df = pd.DataFrame(d)  # Create a dataframe using series in d

In [62]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [63]:
pd.DataFrame(d, index=['a', 'b', 'c'])

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0


In [64]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [65]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three']) # Set up data frame with column two and new column three with no data

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [66]:
df.index


Index([u'a', u'b', u'c', u'd'], dtype='object')

In [68]:
df.columns



Index([u'one', u'two'], dtype='object')

In [69]:
d = {'one' : [1., 2., 3., 4.],
    'two' : [4., 3., 2., 1.]}

In [70]:
d

{'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}

In [71]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])

In [72]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")]

In [73]:
data

array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], 
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [74]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])  # Sets up an arrany and defines the type of data it will accept

In [75]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")] # inserts data into two rows

In [76]:
data

array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], 
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [77]:
pd.DataFrame(data)


Unnamed: 0,A,B,C
0,1,2.0,Hello
1,2,3.0,World


In [78]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])  # Sets up an arrany and defines the type of data it will accept

In [79]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,0,0.0,
1,0,0.0,


In [80]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")] # inserts data into two rows

In [81]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,Hello
1,2,3.0,World


In [82]:
data[:] = [(10000,2.,'Hello'), (2,3.,"World")] # inserts data into two rows

In [83]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,10000,2.0,Hello
1,2,3.0,World


In [84]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")] # inserts data into two rows

In [85]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,Hello
1,2,3.0,World


In [86]:
pd.DataFrame(data, index=['first', 'second'])


Unnamed: 0,A,B,C
first,1,2.0,Hello
second,2,3.0,World


In [87]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,Hello
1,2,3.0,World


In [88]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,Hello,1,2.0
1,World,2,3.0


In [89]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [90]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [91]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [92]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [93]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [94]:
df['one']


a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [95]:
df['three'] = df['one'] * df['two'] # create column three and execute a calculation


In [96]:
df

Unnamed: 0,one,two,three
a,1.0,1.0,1.0
b,2.0,2.0,4.0
c,3.0,3.0,9.0
d,,4.0,


In [97]:
df['a','b']

KeyError: ('a', 'b')

In [98]:
df['a']

KeyError: 'a'

In [99]:
df('a')

TypeError: 'DataFrame' object is not callable

In [100]:
df['flag'] = df['one']>2

In [101]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [102]:
df['flag']

a    False
b    False
c     True
d    False
Name: flag, dtype: bool

In [103]:
df['a']

KeyError: 'a'

In [104]:
 df[,'a']

SyntaxError: invalid syntax (<ipython-input-104-5349de157287>, line 1)

In [105]:
three = df.pop('three')

In [107]:
df

Unnamed: 0,one,two,flag
a,1.0,1.0,False
b,2.0,2.0,False
c,3.0,3.0,True
d,,4.0,False


In [108]:
fd.loc['a']

NameError: name 'fd' is not defined

In [109]:
df.loc['a']

one         1
two         1
flag    False
Name: a, dtype: object

In [110]:
three

a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [111]:
df

Unnamed: 0,one,two,flag
a,1.0,1.0,False
b,2.0,2.0,False
c,3.0,3.0,True
d,,4.0,False


In [112]:
df['three'] = df['one'] * df['two']


In [113]:
df

Unnamed: 0,one,two,flag,three
a,1.0,1.0,False,1.0
b,2.0,2.0,False,4.0
c,3.0,3.0,True,9.0
d,,4.0,False,


In [114]:
df['foo'] = 'bar'

In [115]:
df

Unnamed: 0,one,two,flag,three,foo
a,1.0,1.0,False,1.0,bar
b,2.0,2.0,False,4.0,bar
c,3.0,3.0,True,9.0,bar
d,,4.0,False,,bar


In [116]:
df['one_trunc'] = df['one'][:2]


In [117]:
df

Unnamed: 0,one,two,flag,three,foo,one_trunc
a,1.0,1.0,False,1.0,bar,1.0
b,2.0,2.0,False,4.0,bar,2.0
c,3.0,3.0,True,9.0,bar,
d,,4.0,False,,bar,


In [118]:
df.insert(1, 'bar', df['one'])

In [119]:
df

Unnamed: 0,one,bar,two,flag,three,foo,one_trunc
a,1.0,1.0,1.0,False,1.0,bar,1.0
b,2.0,2.0,2.0,False,4.0,bar,2.0
c,3.0,3.0,3.0,True,9.0,bar,
d,,,4.0,False,,bar,


In [120]:
df['bar_two] = df['one']']

SyntaxError: invalid syntax (<ipython-input-120-a4c84947f3cb>, line 1)

In [121]:
df['bar_two] = df['one']

SyntaxError: invalid syntax (<ipython-input-121-00f577b3013e>, line 1)