In [1]:
# 추천자료 
# - https://www.youtube.com/watch?v=SMpPaSpCaAo
# - http://www.deeplearning.net/tutorial/index.html
# - git clone https://github.com/lisa-lab/DeepLearningTutorials.git

In [2]:
# 실전 Theano

In [3]:
%matplotlib inline

# pip install Theano

import numpy as np
import matplotlib.pyplot as plt
import theano
import theano.tensor as T

In [4]:
# Let's Do Some Hasking First
# 1. Symbolic Variables
# No particular value for x, y, z
# Representing relationship between numbers
x = T.scalar()
y = T.scalar()
z = x + y

In [5]:
# "compiles" a function ftnSum(Resuable!)
# 실제 실행하면 약간 멈칫한다. 즉 실제 C 컴파일이 일어난다.
# 그리고 이제부터는 재사용가능해진다.
# inputs의 인자는 리스트다. 심볼릭 variable 리스트를 받는다.
# output 인자는 결과 심볼릭 variable
# 자동으로 최적화 일어남(알아서 병렬처리 신경써준다)
# GPU에서 할지, CPU 여러개 병렬로 할지는 
# theano 실행하기 전에 - 외부에서 커맨드라인으로 theano의 설정으로 
# 환경설정을 해주면 된다.
# 그럼 알아서 해준다.
ftnSum = theano.function(inputs=[x, y], outputs=z)

In [6]:
print(ftnSum(3,4)) 

7.0


In [7]:
# Data Types
x = T.scalar() # Default
x = T.iscalar() # int32
x = T.lscalar() # int64
x = T.dscalar() # float64

In [8]:
# default data type can be check by
print(theano.config.floatX)

float64


In [9]:
# A symbolic varible can have various dimensions
x = T.scalar() # 0-D
x = T.vector() # 1-D
x = T.matrix() # 3-D
x = T.tensor4() # 4-D

In [10]:
x = T.matrix()
y = T.matrix()
z = T.dot(x, y)

In [11]:
ftnSum_mat = theano.function([x, y], z)

In [12]:
print(ftnSum_mat(3,4)) # 입력값이 매트릭스가 아니다.

TypeError: ('Bad input argument to theano function at index 0(0-based)', 'Wrong number of dimensions: expected 2, got 0 with shape ().')

In [13]:
a = np.random.random((3,3))
a

array([[ 0.44044296,  0.60303309,  0.38584833],
       [ 0.49866465,  0.70997668,  0.52595583],
       [ 0.13384199,  0.69914047,  0.62473428]])

In [14]:
b = np.random.random((3,3))
b

array([[ 0.28225125,  0.20428376,  0.99418314],
       [ 0.72860981,  0.59863586,  0.5276889 ],
       [ 0.29856048,  0.55444276,  0.1331747 ]])

In [15]:
print(ftnSum_mat(a,b))

[[ 0.67889047  0.66490338  0.80748007]
 [ 0.81507432  0.81849899  0.94045481]
 [ 0.73369864  0.79225169  0.58519092]]


In [16]:
# For GPU, Theano supports 32bit float ONLY!
# Python automatically uses 64bit float
# We need to cast it manually
a = np.asarray(np.random.random((3,3)),
               dtype=theano.config.floatX)
b = np.asarray(np.random.random((3,3)),
               dtype=theano.config.floatX)
print(a, b, ftnSum_mat(a,b))

(array([[  6.84176443e-01,   7.05624803e-02,   8.41396742e-01],
       [  9.00853917e-01,   9.51530784e-01,   9.21344149e-01],
       [  3.53216237e-04,   4.61361717e-01,   4.60603229e-01]]), array([[ 0.92940705,  0.73033111,  0.03108234],
       [ 0.43446638,  0.18009143,  0.05902178],
       [ 0.23989826,  0.46565675,  0.69574988]]), array([[ 0.86838505,  0.90418511,  0.61083221],
       [ 1.47169698,  1.2583143 ,  0.72518677],
       [ 0.31127235,  0.29782826,  0.34770601]]))


In [17]:
# 1. data types를 잘 파악하라(int, float ..)
# 2. dimensions에 주의하라(scalar, vector, matrix)
# 3. float32를 써라 

In [18]:
# Shared Variables

# Python Variables : Have a specific value
# Theano Symbolic Variables : Are empty slots for computation
# Shared Variables : Are slots for computation
#                    and have a specific value
#                    Whose values are stored in GPU memory

# Python Variables ------- Theano Symbolic Variables
#                    | 
#                    V
#           Shared Variables

In [19]:
# You can easily create shared variables
a = np.array([[1,2],[3,4]], dtype=theano.config.floatX)
x = theano.shared(a)

# (It seems) data is sent to GPU memory at th moment 

In [20]:
# Retrieve or update its stored values
print(x.get_value())
x.set_value(x.get_value()+1)
print(x.get_value())

[[ 1.  2.]
 [ 3.  4.]]
[[ 2.  3.]
 [ 4.  5.]]


In [21]:
# Shared Variales - Complicatios

# 1. "Borrowing" : Making a soft copy
a = np.array([[1,2],[3,4]], dtype=theano.config.floatX)
x_1 = theano.shared(a)  # default : False
x_2 = theano.shared(a, borrow=True)

# "borrow=True" is generally recommended
# - Memory saving(not GPU's, unfortunately)
# - Faster code

# Keep that it is a soft copy
# - Change a, change x_2

In [22]:
# 2. Shared variables can't be inputs or ouputs
# They can only be the intermediates
a = np.array([[1,2],[3,4]], dtype=theano.config.floatX)
x = theano.shared(a)
y = x ** 2
ftnShared = theano.function([x], y)

TypeError: Cannot use a shared variable (<TensorType(float64, matrix)>) as explicit input. Consider substituting a non-shared variable via the `givens` parameter

In [23]:
# II. Basic Funtions

# Theano provides predefined deep learning related functions !

# Multi Layer Perceptron :
# - T.nnet.sigmoid()
# - T.nnet.categorical_crossentropy()
# - T.nnet.softmax()

# Convolutional Neural Net :
# - T.nnet.conv.conv2d()
# - T.signal.downsample.max_pool_2d()
# - and a few more..complicated ..

In [24]:
# III. Optimization

In [25]:
# Symbolic Differentiation
# diff(x^2) => 2x

x = T.scalar()
y = x ** 2
diff = theano.function([x], T.grad(y, [x]))

# Function to differentiate (Objective function)
# - Should be scalar
# Differentiating variable
# - Can be many
# Outpug of T.grad() is a list

In [26]:
# Outpug of T.grad() is a list
diff(3)

[array(6.0)]

In [27]:
# 결과가 리스트로 나오는게 싫으면
diff = theano.function([x], T.grad(y, [x])[0])
diff(3)

array(6.0)

In [28]:
# Evaluate & Update

x = T.scalar()
w = theano.shared(np.array(3, dtype=theano.config.floatX)
                  ,borrow=True)

In [29]:
obj = (1 - x*w)**2 # our objective
                   # x(데이터)에 w(가중치)를 곱해서 그 결과를 1에 가깝게
                   # 만들고 싶다. 최적화 목표.

In [30]:
learn_w = (w, w-0.1*T.grad(obj,w))
# x로부터, obj를 계산해서, 그 값으로 update를 계산
# x -> obj -> updates
# input : x
# output : obj
# updates : updates
learn = theano.function([x], obj, updates=[learn_w])

# "updates" arguments
# - A list of tuples
# - [(before, after), (before, after), ...]

In [31]:
# 학습
learn(2)

array(25.0)

In [None]:
# Theano를 이용한 딥러닝