In [1]:
import numpy as np
import pandas as pd
pd.__version__, np.__version__

('0.23.0', '1.14.3')

In [2]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [3]:
# Convert to series
pd.Series(mylist)
pd.Series(myarr)
pd.Series(mydict);

In [4]:
# Convert index to column in a DataFrame
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
ser.to_frame().reset_index();

In [5]:
# Combine series to form DataFrame
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))
pd.DataFrame({'col1': ser1, 'col2': ser2})
pd.concat([ser1, ser2], axis=1);

In [6]:
# Assign a name to series
ser = pd.Series(list('abcedfgh'))
ser.name = 'Test'
ser

0    a
1    b
2    c
3    e
4    d
5    f
6    g
7    h
Name: Test, dtype: object

In [7]:
# Get the items in A that is not in B
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

In [8]:
ser = pd.Series(np.random.normal(10, 5, 25))
ser.describe()

count    25.000000
mean      9.307620
std       5.067542
min      -0.494606
25%       5.983179
50%       8.738448
75%      13.930007
max      17.350220
dtype: float64

In [9]:
np.percentile(ser, q=[0, 0.25, 0.5, 0.75, 1])

array([-0.49460606, -0.38903823, -0.28347041, -0.17790258, -0.07233476])

In [10]:
# Get freq counts
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
ser.value_counts()

a    6
f    6
h    4
g    3
b    3
d    3
e    3
c    2
dtype: int64

In [11]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
ser

0     2
1     1
2     1
3     3
4     3
5     4
6     4
7     1
8     1
9     3
10    3
11    3
dtype: int32

In [12]:
ser[~ser.isin(ser.value_counts()[:2])] = 'Other'
ser

0     Other
1     Other
2     Other
3     Other
4     Other
5         4
6         4
7     Other
8     Other
9     Other
10    Other
11    Other
dtype: object

In [22]:
# Discreticize based on deciles
ser = pd.Series(np.random.random(20))
pd.qcut(ser, np.linspace(0, 1, 11),
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th'])

0      1st
1      9th
2      1st
3      5th
4      7th
5      2nd
6      2nd
7      5th
8      7th
9      3rd
10     8th
11     3rd
12     6th
13     8th
14     9th
15    10th
16    10th
17     4th
18     4th
19     6th
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]

In [24]:
ser = pd.Series(np.random.randint(1, 10, 35))
pd.DataFrame(ser.values.reshape(7, 5))

Unnamed: 0,0,1,2,3,4
0,1,3,7,9,4
1,7,9,3,8,1
2,6,6,8,1,8
3,7,1,9,6,7
4,6,4,3,6,7
5,2,8,7,9,9
6,9,1,2,1,2


In [29]:
ser = pd.Series(np.random.randint(1, 10, 7))
np.argwhere(ser % 3 == 0)

array([[2],
       [4],
       [5]], dtype=int64)

In [30]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]
ser[pos]

0     a
4     e
8     i
14    o
20    u
dtype: object

In [32]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))
pd.concat([ser1, ser2], axis=1)
ser1.append(ser2)

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

In [34]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
ser.map(lambda x: x[0].upper() + x[1:])

0     How
1      To
2    Kick
3    Ass?
dtype: object

In [35]:
ser.apply(lambda x: len(x))

0    3
1    2
2    4
3    4
dtype: int64

In [36]:
# Convert string date to time series
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303',
                 '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
pd.to_datetime(ser)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]