In [1]:
# import np and pd
import numpy as np
import pandas as pd

### 2. Create a series from a list, numpy array and dict

In [4]:
mylist = list('abcdefhijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

print('mylist:', mylist)
print('myarr:', myarr)
print('mydict:', mydict)

mylist: ['a', 'b', 'c', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
myarr: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]
mydict: {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'h': 6, 'i': 7, 'j': 8, 'k': 9, 'l': 10, 'm': 11, 'n': 12, 'o': 13, 'p': 14, 'q': 15, 'r': 16, 's': 17, 't': 18, 'u': 19, 'v': 20, 'w': 21, 'x': 22, 'y': 23, 'z': 24}


### 3. Convert the index of a series into a column of a dataframe

In [11]:
# series
ser = pd.Series(mydict)

print('ser:', ser.head())

# convert to df
# reset_index()는 index를 단순 정수 인덱스로 새로 생성
df = ser.to_frame().reset_index()
print(df.head())

ser: a    0
b    1
c    2
d    3
e    4
dtype: int64
  index  0
0     a  0
1     b  1
2     c  2
3     d  3
4     e  4


### 4. combine many series to form a dataframe

In [14]:
ser1 = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

df1 = pd.concat([ser1,ser2], axis=1)
print('df1:\n', df1.head())

df2 = pd.DataFrame({'col1':ser1, 'col2':ser2})
print('df2:\n', df2.head())

df1:
    0  1
0  a  0
1  b  1
2  c  2
3  d  3
4  e  4
df2:
   col1  col2
0    a     0
1    b     1
2    c     2
3    d     3
4    e     4


### 5. Assign name to the series' index

In [15]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))

ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    d
4    e
Name: alphabets, dtype: object

### 6. Get the items of series A not present in series *B*

In [17]:
# ser1 - ser2
ser1 = pd.Series([1,2,3,4,5])
ser2 = pd.Series([4,5,6,7,8])

ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

### 7. Get the items not common to both series A and series B

In [19]:
ser1 = pd.Series([1,2,3,4,5])
ser2 = pd.Series([4,5,6,7,8])

# 집합더하기
ser_u = pd.Series(np.union1d(ser1,ser2))
# 교집합
ser_i = pd.Series(np.intersect1d(ser1,ser2))
# 합집합
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

### 8. Get the minimum, 25, 50, 75 % and max of a numeric series

In [20]:
state = np.random.RandomState(100)
ser = pd.Series(np.random.normal(10,5,25))

np.percentile(ser, q=[0, 25, 50, 75, 100])

array([-7.19372713,  6.52254704,  9.7287353 , 11.97256585, 19.00629195])

### 9. Get frequency counts of unique items of a series

In [22]:

ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

ser.value_counts()

a    7
e    5
f    5
h    4
d    3
g    3
c    2
b    1
dtype: int64

### 10. Keep only top 2 most frequent values as it is and replace everything else as 'Other'

In [24]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

print("Top 2 Freq:", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

Top 2 Freq: 3    6
2    3
4    2
1    1
dtype: int64


0         3
1     Other
2         2
3         3
4         3
5         3
6         2
7         3
8     Other
9         2
10    Other
11        3
dtype: object

### 11. Bin a numeric series to 10 groups of equal size

In [25]:
ser = pd.Series(np.random.random(20))
print(ser.head())

# cut은 동일 길이로 나누기, qcut은 동일 갯수(사이즈)로 나누기
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1],
        labels=['1st','2nd','3rd','4th','5th','6th','7th','8th','9th','10th']).head()

0    0.991360
1    0.095594
2    0.115758
3    0.856700
4    0.630930
dtype: float64


0    10th
1     1st
2     2nd
3     8th
4     7th
dtype: category
Categories (10, object): ['1st' < '2nd' < '3rd' < '4th' ... '7th' < '8th' < '9th' < '10th']

### 12. Convert a numpy array to a dataframe of given shape

In [26]:
ser = pd.Series(np.random.randint(1, 10, 35))

df = pd.DataFrame(ser.values.reshape(7,5))
print(df)

   0  1  2  3  4
0  1  6  5  7  9
1  6  8  7  3  5
2  2  8  7  1  5
3  8  6  7  1  7
4  5  4  1  2  2
5  2  5  4  7  9
6  9  6  6  7  5


### 13. Find the positions of numbers that are multiples of 3 from a series

In [38]:
nplist=np.random.randint(1,10,7)
ser = pd.Series(nplist)

print(ser)
np.argwhere(nplist%3==0)

0    8
1    3
2    9
3    9
4    6
5    4
6    4
dtype: int64


array([[1],
       [2],
       [3],
       [4]])

### 14. Extract items at given positions from a series

In [39]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

ser.take(pos)

0     a
4     e
8     i
14    o
20    u
dtype: object

### 15. Stack two series vertically and horizontally

In [43]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

# Vertical
print(ser1.append(ser2))

# Horizon
df = pd.concat([ser1, ser2], axis=1)
print(df)

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object
   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e


### 16. Get the positions of items of series A in another serries B

In [44]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

# sol1
[np.where(i==ser1)[0].tolist()[0] for i in ser2]

# sol2
[pd.Index(ser1).get_loc(i) for i in ser2]

[5, 4, 0, 8]