In [20]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a   -0.206657
b   -2.091004
c    1.809307
d    1.397411
e   -1.269874
dtype: float64

In [3]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [4]:
pd.Series(np.random.rand(5))

0    0.361073
1    0.497896
2    0.354834
3    0.013612
4    0.665716
dtype: float64

### From Dict

In [5]:
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

### From scalar value

In [21]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

### Series is ndarray-like

In [7]:
s[0]

-0.20665746679867483

In [8]:
s[:3]

a   -0.206657
b   -2.091004
c    1.809307
dtype: float64

In [9]:
s[s > s.median()]

c    1.809307
d    1.397411
dtype: float64

In [10]:
s[[4, 3, 1]]

e   -1.269874
d    1.397411
b   -2.091004
dtype: float64

In [11]:
np.exp(s)

a    0.813298
b    0.123563
c    6.106211
d    4.044716
e    0.280867
dtype: float64

In [12]:
s.dtype

dtype('float64')

While Series is ndarray-like, if you need an actual ndarray, then use Series.to_numpy():

In [13]:
s.to_numpy()

array([-0.20665747, -2.09100419,  1.80930652,  1.39741123, -1.26987358])

### Series is dict-like

In [14]:
s['a']

-0.20665746679867483

In [15]:
s['e']

-1.2698735776891692

In [16]:
s

a   -0.206657
b   -2.091004
c    1.809307
d    1.397411
e   -1.269874
dtype: float64

In [17]:
'e' in s

True

In [18]:
'f' in s

False

In [19]:
s['f']

KeyError: 'f'

### Vectorized operations

In [None]:
s + s

In [None]:
s * 2

In [None]:
np.exp(s)

A key difference between Series and ndarray is that operations between Series automatically align data based on the label. Thus, you can write computations without considering whether the Series involved have the same labels.

In [None]:
s1 = s[1:]
s2 = s[:-1]
s1 + s2

### Name Attribute

In [None]:
s = pd.Series(np.random.randn(5), name='something')
s

In [None]:
s.name

## DataFrame

### From Dictionary of Series or dictionaries

In [None]:
d = {'one': pd.Series([1., 2., 3.], index=['a','b','c']),
    'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

In [None]:
pd.DataFrame(d,index=['d','b','a'])

In [None]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

### From Dictionary of ndarrays or lists

In [None]:
d = {'one': [1., 2., 3., 4.], 
    'two': [4., 3., 2., 1.]}
pd.DataFrame(d)

In [None]:
pd.DataFrame(d,index=['a', 'b', 'c', 'd'])

### From a Series

In [None]:
pd.DataFrame(pd.Series(np.random.randn(5), name='something'))

### Column selection, addition, deletion

In [None]:
df['one']

In [None]:
df['three']= df['one']*df['two']
df['flag'] = df['one'] > 2
df

In [None]:
del df['two']

In [None]:
df['foo'] = 'bar'
df

When insterting a Series that does not have the same index as the DataFrame, it will be conformed to the DataFrame's index

In [None]:
df['one_trunc'] = df['one'][:2]
df

### Scalar Operations

In [None]:
df = pd.DataFrame(np.random.randn(8,3),  columns=list('ABC'))
df * 5 +2

In [None]:
1/df

In [None]:
df ** 4

## Boolean operators are vectorized as well

And:

In [None]:
df1 = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 1]}, dtype=bool)
df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [1, 1, 0]}, dtype=bool)
print(df1, df2)
df1 & df2

Or:


In [None]:
df1 | df2

exclusive or:

In [None]:
df1 ^ df2

In [None]:
-df1

## dtypes

In [24]:
dft = pd.DataFrame({'A': np.random.rand(3),
                    'B': 1,
                    'C': 'foo',
                    'D': pd.Timestamp('20010102'),
                    'E': pd.Series([1.0] * 3).astype('float32'),
                    'F': False,
                    'G': pd.Series([1] * 3, dtype='int8')})
                        
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.625848,1,foo,2001-01-02,1.0,False,1
1,0.436724,1,foo,2001-01-02,1.0,False,1
2,0.964131,1,foo,2001-01-02,1.0,False,1


In [25]:
dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [26]:
dft['A'].dtype

dtype('float64')

In [27]:
pd.Series([1,2, 3, 6., 'foo'])

0      1
1      2
2      3
3      6
4    foo
dtype: object

In [28]:
df1 = pd.DataFrame(np.random.randn(8,1), columns=['A'], dtype='float32')
df1.dtypes

A    float32
dtype: object

In [29]:
df1 = df1.astype('float64')

In [30]:
df1.dtypes

A    float64
dtype: object

Convert certain columns to a specific dtype by passing ia dict to astype()

In [31]:
df1 = pd.DataFrame({'a': [1, 0, 1], 'b': [4, 5, 6], 'c':[7, 8, 9]})
df1 = df1.astype({'a':np.bool, 'c': np.float64})
df1

Unnamed: 0,a,b,c
0,True,4,7.0
1,False,5,8.0
2,True,6,9.0


In [32]:
df1.dtypes

a       bool
b      int64
c    float64
dtype: object