In [14]:
import numpy as np
import pandas as pd

In [15]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a   -0.553005
b   -0.575358
c   -0.071550
d   -0.234496
e   -0.850701
dtype: float64

In [16]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [17]:
pd.Series(np.random.rand(5))

0    0.341463
1    0.438511
2    0.304867
3    0.411635
4    0.564872
dtype: float64

### From Dict

In [18]:
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

### From scalar value

In [19]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>' as a data type

### Series is ndarray-like

In [20]:
s[0]

-0.5530047758427765

In [21]:
s[:3]

a   -0.553005
b   -0.575358
c   -0.071550
dtype: float64

In [22]:
s[s > s.median()]

c   -0.071550
d   -0.234496
dtype: float64

In [23]:
s[[4, 3, 1]]

e   -0.850701
d   -0.234496
b   -0.575358
dtype: float64

In [24]:
np.exp(s)

a    0.575219
b    0.562504
c    0.930949
d    0.790970
e    0.427115
dtype: float64

In [25]:
s.dtype

dtype('float64')

While Series is ndarray-like, if you need an actual ndarray, then use Series.to_numpy():

In [26]:
s.to_numpy()

array([-0.55300478, -0.5753576 , -0.07155038, -0.23449583, -0.85070102])

### Series is dict-like

In [27]:
s['a']

-0.5530047758427765

In [28]:
s['e']

-0.8507010192414463

In [29]:
s

a   -0.553005
b   -0.575358
c   -0.071550
d   -0.234496
e   -0.850701
dtype: float64

In [30]:
'e' in s

True

In [31]:
'f' in s

False

In [32]:
s['f']

KeyError: 'f'

### Vectorized operations

In [33]:
s + s

a   -1.106010
b   -1.150715
c   -0.143101
d   -0.468992
e   -1.701402
dtype: float64

In [34]:
s * 2

a   -1.106010
b   -1.150715
c   -0.143101
d   -0.468992
e   -1.701402
dtype: float64

In [35]:
np.exp(s)

a    0.575219
b    0.562504
c    0.930949
d    0.790970
e    0.427115
dtype: float64

A key difference between Series and ndarray is that operations between Series automatically align data based on the label. Thus, you can write computations without considering whether the Series involved have the same labels.

In [36]:
s1 = s[1:]
s2 = s[:-1]
s1 + s2

a         NaN
b   -1.150715
c   -0.143101
d   -0.468992
e         NaN
dtype: float64

### Name Attribute

In [37]:
s = pd.Series(np.random.randn(5), name='something')
s

0   -1.416899
1   -0.887527
2    0.830732
3    0.846152
4    1.196156
Name: something, dtype: float64

In [38]:
s.name

'something'

## DataFrame

### From Dictionary of Series or dictionaries

In [39]:
d = {'one': pd.Series([1., 2., 3.], index=['a','b','c']),
    'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [40]:
pd.DataFrame(d,index=['d','b','a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [41]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

AttributeError: type object 'object' has no attribute 'dtype'

### From Dictionary of ndarrays or lists

In [42]:
d = {'one': [1., 2., 3., 4.], 
    'two': [4., 3., 2., 1.]}
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [43]:
pd.DataFrame(d,index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


### From a Series

In [44]:
pd.DataFrame(pd.Series(np.random.randn(5), name='something'))

Unnamed: 0,something
0,1.768267
1,-0.640972
2,0.859309
3,-0.142241
4,0.236315


### Column selection, addition, deletion

In [45]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [46]:
df['three']= df['one']*df['two']
df['flag'] = df['one'] > 2
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [47]:
del df['two']

In [48]:
df['foo'] = 'bar'
df

Unnamed: 0,one,three,flag,foo
a,1.0,1.0,False,bar
b,2.0,4.0,False,bar
c,3.0,9.0,True,bar
d,,,False,bar


When insterting a Series that does not have the same index as the DataFrame, it will be conformed to the DataFrame's index

In [49]:
df['one_trunc'] = df['one'][:2]
df

Unnamed: 0,one,three,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,4.0,False,bar,2.0
c,3.0,9.0,True,bar,
d,,,False,bar,


### Scalar Operations

In [54]:
df = pd.DataFrame(np.random.randn(8,3),  columns=list('ABC'))
df * 5 +2

Unnamed: 0,A,B,C
0,-1.950041,5.401718,2.650995
1,-5.633324,7.699903,-2.712734
2,1.682391,1.411245,2.040682
3,-7.793408,-2.979179,-5.236465
4,-7.995742,-7.350596,-2.863394
5,1.838838,-6.639486,4.782147
6,-2.176509,4.136013,5.572507
7,5.213413,5.13141,9.403486


In [55]:
1/df

Unnamed: 0,A,B,C
0,-1.26581,1.469845,7.680548
1,-0.655023,0.877208,-1.060955
2,-15.74262,-8.492501,122.904691
3,-0.510548,-1.004182,-0.690945
4,-0.500213,-0.534725,-1.028089
5,-31.024715,-0.578738,1.797174
6,-1.197172,2.340809,1.399578
7,1.555978,1.596725,0.675358


In [56]:
df ** 4

Unnamed: 0,A,B,C
0,0.389517,0.214246,0.0002873631
1,5.432187,1.688846,0.7892449
2,1.6e-05,0.000192,4.382543e-09
3,14.718222,0.983447,4.387588
4,15.972767,12.231427,0.8951126
5,1e-06,8.913977,0.09586056
6,0.486826,0.033307,0.2606227
7,0.170603,0.153844,4.806898


## Boolean operators are vectorized as well

And:

In [61]:
df1 = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 1]}, dtype=bool)
df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [1, 1, 0]}, dtype=bool)
print(df1, df2)
df1 & df2

       a      b
0   True  False
1  False   True
2   True   True        a      b
0  False   True
1   True   True
2   True  False


Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


Or:


In [62]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


exclusive or:

In [63]:
df1 ^ df2

Unnamed: 0,a,b
0,True,True
1,True,False
2,False,True


In [64]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False
