# NumPy

In [1]:
import numpy as np

In [2]:
x = np.array([[1, 2, 3], [4, 5, 6]])

## NumPy Help functions

In [3]:
np.lookfor(x)

Search results for '[[1 2 3] [4 5 6]]'
--------------------------------------
numpy.diagonal
    Return specified diagonals.
numpy.flip
    Reverse the order of elements in an array along the given axis.
numpy.load
    Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
numpy.ndim
    Return the number of dimensions of an array.
numpy.size
    Return the number of elements along a given axis.
numpy.cross
    Return the cross product of two (arrays of) vectors.
numpy.ravel
    Return a contiguous flattened array.
numpy.rot90
    Rotate an array by 90 degrees in the plane specified by axes.
numpy.append
    Append values to the end of an array.
numpy.cumsum
    Return the cumulative sum of the elements along a given axis.
numpy.cumprod
    Return the cumulative product of elements along a given axis.
numpy.reshape
    Gives a new shape to an array without changing its data.
numpy.swapaxes
    Interchange two axes of an array.
numpy.isfortran
    Returns True if the a

In [4]:
np.info(x)

class:  ndarray
shape:  (2, 3)
strides:  (24, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x281b860
byteorder:  little
byteswap:  False
type: int64


## reshape & resize

In [5]:
y = x.reshape(1, -1)

In [6]:
y

array([[1, 2, 3, 4, 5, 6]])

In [7]:
y = x.reshape(2, -2)

In [8]:
y

array([[1, 2, 3],
       [4, 5, 6]])

In [9]:
y = x.reshape(1, -3)

In [10]:
y

array([[1, 2, 3, 4, 5, 6]])

In [11]:
y = x.reshape(2, -100)

In [12]:
y

array([[1, 2, 3],
       [4, 5, 6]])

In [13]:
x

array([[1, 2, 3],
       [4, 5, 6]])

In [14]:
x = np.arange(27)

In [15]:
x.resize((3, 9))

In [16]:
x

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26]])

resize는 음수 불가능

In [17]:
x.resize((3, -1))

ValueError: negative dimensions not allowed

### split

In [18]:
np.split(x, (2, 2))

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17]]),
 array([], shape=(0, 9), dtype=int64),
 array([[18, 19, 20, 21, 22, 23, 24, 25, 26]])]

In [19]:
np.split(x, (-2, 5))

[array([[0, 1, 2, 3, 4, 5, 6, 7, 8]]),
 array([[ 9, 10, 11, 12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23, 24, 25, 26]]),
 array([], shape=(0, 9), dtype=int64)]

In [20]:
np.split(x, (5, 5))

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23, 24, 25, 26]]),
 array([], shape=(0, 9), dtype=int64),
 array([], shape=(0, 9), dtype=int64)]

In [21]:
np.hsplit(x, (5, ))

[array([[ 0,  1,  2,  3,  4],
        [ 9, 10, 11, 12, 13],
        [18, 19, 20, 21, 22]]), array([[ 5,  6,  7,  8],
        [14, 15, 16, 17],
        [23, 24, 25, 26]])]

In [22]:
x = np.array([[1, 2, 3], [4, 5, 6]])

In [23]:
np.vsplit(x, 2)

[array([[1, 2, 3]]), array([[4, 5, 6]])]

- 결론 : 마이너스 쓰면 복잡해진다.
- 이유
  1. 처음에 `-`로 시작하면 0을 빼놓고 처리되는 것 같다.
  2. `,`가 두번 쓰이니까 3개로 나눠지긴 해야함

### stack

In [24]:
x = np.arange(5)
y = np.arange(5, 10)

In [25]:
np.stack((x, y))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [26]:
np.stack((x, y), axis=1)

array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])

In [27]:
np.vstack((x, y))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [28]:
np.column_stack([x, y])

array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])

### slicing

In [29]:
np.c_[x, y]

array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])

In [30]:
np.r_[x, y]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [31]:
np.s_[x, y]

(array([0, 1, 2, 3, 4]), array([5, 6, 7, 8, 9]))

In [32]:
[1, 2, 3, 4, 5][slice(1, 5)]

[2, 3, 4, 5]

In [33]:
np.arange(10)[np.s_[1]]

1

### np.newaxis

In [34]:
np.newaxis == None

True

In [35]:
x

array([0, 1, 2, 3, 4])

In [36]:
x[np.newaxis]

array([[0, 1, 2, 3, 4]])

In [37]:
x[None]

array([[0, 1, 2, 3, 4]])

In [38]:
x[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [39]:
x[np.newaxis, :]

array([[0, 1, 2, 3, 4]])

In [40]:
x = x[np.newaxis]

In [41]:
x

array([[0, 1, 2, 3, 4]])

In [42]:
x[:, np.newaxis]

array([[[0, 1, 2, 3, 4]]])

In [43]:
x[:, np.newaxis, :]

array([[[0, 1, 2, 3, 4]]])

In [44]:
x[:, :, np.newaxis]

array([[[0],
        [1],
        [2],
        [3],
        [4]]])

In [45]:
x[np.newaxis, np.newaxis, np.newaxis]

array([[[[[0, 1, 2, 3, 4]]]]])

In [46]:
np.expand_dims(x, 0)

array([[[0, 1, 2, 3, 4]]])

In [47]:
np.expand_dims(x, 1)

array([[[0, 1, 2, 3, 4]]])

In [48]:
np.expand_dims(x, 2)

array([[[0],
        [1],
        [2],
        [3],
        [4]]])

In [49]:
np.expand_dims(x, 3)

  """Entry point for launching an IPython kernel.


array([[[0],
        [1],
        [2],
        [3],
        [4]]])

In [50]:
np.expand_dims(x, 200)

  """Entry point for launching an IPython kernel.


array([[[0],
        [1],
        [2],
        [3],
        [4]]])

# Structured Arrays
[참고](https://docs.scipy.org/doc/numpy/user/basics.rec.html?highlight=structure#module-numpy.doc.structured_arrays)

## namedtuple

In [51]:
from collections import namedtuple

In [52]:
Address = namedtuple('Address', ('name', 'age', 'weight'))

In [53]:
Address

__main__.Address

In [54]:
my_addr = Address('nashorstyle', 25, 72)

In [55]:
my_addr

Address(name='nashorstyle', age=25, weight=72)

In [56]:
my_addr.name

'nashorstyle'

In [57]:
my_addr.age

25

In [58]:
my_addr.weight

72

In [59]:
my_addr[0]

'nashorstyle'

In [60]:
my_addr[1]

25

In [61]:
my_addr[2]

72

## NumPy에서

디스크 기반이 아닌 메모리 기반으로 올라가기 때문에 속도가 빠름

In [62]:
x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)],
             dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')])

In [63]:
x

array([('Rex', 9, 81.), ('Fido', 3, 27.)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f4')])

In [64]:
x[0]

('Rex', 9, 81.)

In [65]:
x[1]

('Fido', 3, 27.)

In [66]:
x.name

AttributeError: 'numpy.ndarray' object has no attribute 'name'

In [67]:
x[0].name

AttributeError: 'numpy.void' object has no attribute 'name'

In [68]:
'name' in dir(x)

False

In [69]:
'name' in dir(x[0])

False

In [70]:
x.dtype

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f4')])

In [71]:
x[0].dtype

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f4')])

In [72]:
x['name']

array(['Rex', 'Fido'], dtype='<U10')

In [73]:
x['age']

array([9, 3], dtype=int32)

In [74]:
x['weight']

array([81., 27.], dtype=float32)

numpy에는 이미 indexing 기법이 존재하기 때문에 numpy의 structured array의 이름을 숫자로 정의할 수는 없으며, 문자열만 가능함

In [75]:
x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)],
             dtype=[(0, 'U10'), (1, 'i4'), (2, 'f4')])

TypeError: data type not understood

---

# Pandas & Seaborn

In [77]:
import seaborn as sns

In [78]:
tips = sns.load_dataset('tips')

In [79]:
tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
total_bill    244 non-null float64
tip           244 non-null float64
sex           244 non-null category
smoker        244 non-null category
day           244 non-null category
time          244 non-null category
size          244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 7.2 KB


In [80]:
tips.size

1708

In [81]:
tips.dtypes

total_bill     float64
tip            float64
sex           category
smoker        category
day           category
time          category
size             int64
dtype: object

In [82]:
tips.loc[0]

total_bill     16.99
tip             1.01
sex           Female
smoker            No
day              Sun
time          Dinner
size               2
Name: 0, dtype: object

In [83]:
tips.iloc[0]

total_bill     16.99
tip             1.01
sex           Female
smoker            No
day              Sun
time          Dinner
size               2
Name: 0, dtype: object

In [84]:
tips.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [85]:
tips.tip

0      1.01
1      1.66
2      3.50
3      3.31
4      3.61
5      4.71
6      2.00
7      3.12
8      1.96
9      3.23
10     1.71
11     5.00
12     1.57
13     3.00
14     3.02
15     3.92
16     1.67
17     3.71
18     3.50
19     3.35
20     4.08
21     2.75
22     2.23
23     7.58
24     3.18
25     2.34
26     2.00
27     2.00
28     4.30
29     3.00
       ... 
214    6.50
215    1.10
216    3.00
217    1.50
218    1.44
219    3.09
220    2.20
221    3.48
222    1.92
223    3.00
224    1.58
225    2.50
226    2.00
227    3.00
228    2.72
229    2.88
230    2.00
231    3.00
232    3.39
233    1.47
234    3.00
235    1.25
236    1.00
237    1.17
238    4.67
239    5.92
240    2.00
241    2.00
242    1.75
243    3.00
Name: tip, Length: 244, dtype: float64

In [86]:
tips['tip']

0      1.01
1      1.66
2      3.50
3      3.31
4      3.61
5      4.71
6      2.00
7      3.12
8      1.96
9      3.23
10     1.71
11     5.00
12     1.57
13     3.00
14     3.02
15     3.92
16     1.67
17     3.71
18     3.50
19     3.35
20     4.08
21     2.75
22     2.23
23     7.58
24     3.18
25     2.34
26     2.00
27     2.00
28     4.30
29     3.00
       ... 
214    6.50
215    1.10
216    3.00
217    1.50
218    1.44
219    3.09
220    2.20
221    3.48
222    1.92
223    3.00
224    1.58
225    2.50
226    2.00
227    3.00
228    2.72
229    2.88
230    2.00
231    3.00
232    3.39
233    1.47
234    3.00
235    1.25
236    1.00
237    1.17
238    4.67
239    5.92
240    2.00
241    2.00
242    1.75
243    3.00
Name: tip, Length: 244, dtype: float64

In [87]:
tips[['tip']]

Unnamed: 0,tip
0,1.01
1,1.66
2,3.50
3,3.31
4,3.61
5,4.71
6,2.00
7,3.12
8,1.96
9,3.23


In [88]:
tips[['tip', 'smoker']]

Unnamed: 0,tip,smoker
0,1.01,No
1,1.66,No
2,3.50,No
3,3.31,No
4,3.61,No
5,4.71,No
6,2.00,No
7,3.12,No
8,1.96,No
9,3.23,No


In [89]:
tips.iloc[:, :-1]

Unnamed: 0,total_bill,tip,sex,smoker,day,time
0,16.99,1.01,Female,No,Sun,Dinner
1,10.34,1.66,Male,No,Sun,Dinner
2,21.01,3.50,Male,No,Sun,Dinner
3,23.68,3.31,Male,No,Sun,Dinner
4,24.59,3.61,Female,No,Sun,Dinner
5,25.29,4.71,Male,No,Sun,Dinner
6,8.77,2.00,Male,No,Sun,Dinner
7,26.88,3.12,Male,No,Sun,Dinner
8,15.04,1.96,Male,No,Sun,Dinner
9,14.78,3.23,Male,No,Sun,Dinner


In [90]:
type(tips.values)

numpy.ndarray

In [91]:
tips.values

array([[16.99, 1.01, 'Female', ..., 'Sun', 'Dinner', 2],
       [10.34, 1.66, 'Male', ..., 'Sun', 'Dinner', 3],
       [21.01, 3.5, 'Male', ..., 'Sun', 'Dinner', 3],
       ...,
       [22.67, 2.0, 'Male', ..., 'Sat', 'Dinner', 2],
       [17.82, 1.75, 'Male', ..., 'Sat', 'Dinner', 2],
       [18.78, 3.0, 'Female', ..., 'Thur', 'Dinner', 2]], dtype=object)

### broadcasting

In [92]:
(tips.tip + 5)

0       6.01
1       6.66
2       8.50
3       8.31
4       8.61
5       9.71
6       7.00
7       8.12
8       6.96
9       8.23
10      6.71
11     10.00
12      6.57
13      8.00
14      8.02
15      8.92
16      6.67
17      8.71
18      8.50
19      8.35
20      9.08
21      7.75
22      7.23
23     12.58
24      8.18
25      7.34
26      7.00
27      7.00
28      9.30
29      8.00
       ...  
214    11.50
215     6.10
216     8.00
217     6.50
218     6.44
219     8.09
220     7.20
221     8.48
222     6.92
223     8.00
224     6.58
225     7.50
226     7.00
227     8.00
228     7.72
229     7.88
230     7.00
231     8.00
232     8.39
233     6.47
234     8.00
235     6.25
236     6.00
237     6.17
238     9.67
239    10.92
240     7.00
241     7.00
242     6.75
243     8.00
Name: tip, Length: 244, dtype: float64

In [93]:
tips[tips.tip > 5]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
23,39.42,7.58,Male,No,Sat,Dinner,4
44,30.4,5.6,Male,No,Sun,Dinner,4
47,32.4,6.0,Male,No,Sun,Dinner,4
52,34.81,5.2,Female,No,Sun,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
85,34.83,5.17,Female,No,Thur,Lunch,4
88,24.71,5.85,Male,No,Thur,Lunch,2
116,29.93,5.07,Male,No,Sun,Dinner,4
141,34.3,6.7,Male,No,Thur,Lunch,6
155,29.85,5.14,Female,No,Sun,Dinner,5


### loc & iloc
- loc: column name
- iloc: index number

In [94]:
tips.loc[3:6]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2


In [95]:
tips.loc[0:1]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3


In [96]:
tips.loc[3, 'day']

'Sun'

In [97]:
tips[['tip', 'smoker']]

Unnamed: 0,tip,smoker
0,1.01,No
1,1.66,No
2,3.50,No
3,3.31,No
4,3.61,No
5,4.71,No
6,2.00,No
7,3.12,No
8,1.96,No
9,3.23,No


In [98]:
tips[['smoker', 'tip']]

Unnamed: 0,smoker,tip
0,No,1.01
1,No,1.66
2,No,3.50
3,No,3.31
4,No,3.61
5,No,4.71
6,No,2.00
7,No,3.12
8,No,1.96
9,No,3.23


### 성능 비교

In [99]:
%timeit tips.tip

2.9 µs ± 51.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [100]:
%timeit tips['tip']

1.23 µs ± 41.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [101]:
%timeit tips[['tip']]

689 µs ± 23 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [102]:
%timeit tips.loc[:, 'tip']

34.9 µs ± 3.47 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [103]:
%timeit tips.iloc[:, 1]

51.5 µs ± 1.45 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
