In [2]:
import numpy as np

In [2]:
# 행벡터
x = np.array([1,2,3])
print(x)

[1 2 3]


In [3]:
# 크기가 (1,3)인 2차원
np.array([[1,2,3]])

array([[1, 2, 3]])

In [4]:
# 열벡터
np.array([[1],[2],[3]])

array([[1],
       [2],
       [3]])

In [5]:
from scipy import linalg

In [6]:
s = np.array([2,3,1])

- 단위 벡터

In [7]:
s / linalg.norm(s)

array([0.53452248, 0.80178373, 0.26726124])

In [8]:
# Norm
x = np.array([1,-3])
v_hat = x / linalg.norm(x)
print(v_hat)

[ 0.31622777 -0.9486833 ]


In [11]:
# Norm 2
x = np.array([-6,-8])
v_hat2 = x / (x**2).sum()**0.5
print(v_hat2)

[-0.6 -0.8]


- 영벡터 (zero)

In [12]:
np.zeros(5) # 실수로 변환

array([0., 0., 0., 0., 0.])

In [13]:
np.zeros(5).shape

(5,)

In [14]:
s = (2,2)
np.zeros(s)

array([[0., 0.],
       [0., 0.]])

- 선형 독립, 선형 종속

In [3]:
# Rank
# matrix_rank()

x = np.array([[2,7],[5,1]])
np.linalg.matrix_rank(x)

2

In [4]:
x1 = np.array([[2,4],[3,6]])
np.linalg.matrix_rank(x1)

1

In [5]:
x2 = np.array([[1,2],[3,5]])
np.linalg.matrix_rank(x2)

2

- 벡터의 덧셈

In [7]:
x = [2,3]
y = [3,1]

z = [i+j for i,j in zip(x,y)]
print(z)

[5, 4]


- 벡터의 뺄셈

In [11]:
z2 = [i-j for i,j in zip(x,y)]
print(z2)

[-1, 2]


- 벡터의 곱셈(inner product, outer product)

In [12]:
x1 = [3,4]
c = 8
z3 = [c*i for i in x1]
print(z3)

[24, 32]


## 내적(inner product)

In [13]:
u = np.array([6,6])
v = np.array([12,0])
np.dot(u,v)

72

In [8]:
# zip 함수
list(zip([1,2,3],[4,5,6]))

[(1, 4), (2, 5), (3, 6)]

In [9]:
list(zip([1,2,3],[4,5,6],[7,8,9]))

[(1, 4, 7), (2, 5, 8), (3, 6, 9)]

- 벡터의 내적 원리

In [5]:
import numpy as np

A = np.arange(1*2*3).reshape((1,2,3))
B1 = np.arange(1*2*3).reshape((1,2,3))
B2 = np.arange(1*2*3).reshape((1,3,2))
B3= np.arange(1*2*3).reshape((2,1,3))
B4 = np.arange(1*2*3).reshape((2,3,1))
B5 = np.arange(1*2*3).reshape((3,1,2))
B6 = np.arange(1*2*3).reshape((3,2,1))

In [6]:
np.dot(A,B1) # 결과는 오류가 맞음.

ValueError: shapes (1,2,3) and (1,2,3) not aligned: 3 (dim 2) != 2 (dim 1)

In [7]:
np.dot(A,B2)

array([[[[10, 13]],

        [[28, 40]]]])

In [10]:
np.dot(A,B3) # 결과는 오류가 맞음.

ValueError: shapes (1,2,3) and (2,1,3) not aligned: 3 (dim 2) != 1 (dim 1)

In [11]:
np.dot(A,B4)

array([[[[ 5],
         [14]],

        [[14],
         [50]]]])

In [12]:
np.dot(A,B5)

ValueError: shapes (1,2,3) and (3,1,2) not aligned: 3 (dim 2) != 1 (dim 1)

In [13]:
np.dot(A,B6)

ValueError: shapes (1,2,3) and (3,2,1) not aligned: 3 (dim 2) != 2 (dim 1)

In [14]:
import numpy as np
u = np.array([3,-6])
v = np.array([-7,9])
uv = np.dot(u,v)
print(uv)

-75


## 외적(Outer product)
- cross() 함수 적용

In [17]:
# 함수 이용
a = (1,3,5)
b = (2,4,6)

def cross(a,b):
    c = [a[1]*b[2] - a[2]*b[1],
         a[2]*b[0] - a[0]*b[2],
         a[0]*b[1] - a[1]*b[0]]
    return c

In [18]:
cross(a,b)

[-2, 4, -2]

In [19]:
# Numpy 를 이용
print(np.cross(a,b))

[-2  4 -2]


- 직교 벡터

In [20]:
# 크기가 (1,2)인 2차원 배열

a = np.array([1,2])
print(a)

[1 2]


In [22]:
np.linalg.norm(a) # a의 길이

2.23606797749979

In [23]:
np.linalg.norm(a, ord=1) # L1 Norm 구하기

3.0

In [24]:
np.linalg.norm(a, ord=12) # L2 Norm 구하기

2.000040685551744

## 벡터의 거리

- 유클리디언 거리

In [26]:
from scipy.spatial import distance

In [27]:
p1 = (1,2,3)
p2 = (4,5,6)

d = distance.euclidean(p1,p2)
print("Euclidean distance : ", d)

Euclidean distance :  5.196152422706632


- 멘해튼 거리

In [28]:
from math import *

In [29]:
p1 = (1,2,3)
p2 = (4,5,6)

def manhattan_distance(x,y):
    return sum(abs(a-b) for a,b in zip(x,y))

md = manhattan_distance(p1,p2)
print("Menhattan distance : ", md)

Menhattan distance :  9


- 코사인 유사도

In [30]:
from numpy import dot
from numpy.linalg import norm
import numpy as np

In [31]:
def cos_sim(A,B):
    return dot(A,B)/(norm(A)*norm(B))

doc1 = np.array([1,1,1,1,0])
doc2 = np.array([1,0,1,0,1])
doc3 = np.array([2,1,1,1,1])

In [32]:
print(cos_sim(doc1, doc2)) # 문서 1과 문서 3의 유사도가 가장 높음
print(cos_sim(doc1, doc3))
print(cos_sim(doc2, doc3))

0.5773502691896258
0.8838834764831843
0.8164965809277259
