In [None]:
# ozone데이터 사용

# 1. python 구현

In [77]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

def numerical_derivative(f,x):    # x : ndarray [1.0  2.0]
    
    delta_x = 1e-4
    derivative_x = np.zeros_like(x)   # derivative_x : [0.0  0.0]
    
    # iterator를 이용해서 입력변수 x에 대한 편미분을 수행!
    it = np.nditer(x, flags=['multi_index'])
    
    while not it.finished:
        
        idx = it.multi_index 
        tmp = x[idx]              # tmp : 1.0
        
        x[idx] = tmp + delta_x    # x : ndarray [1.0001  2.0]
        fx_plus_delta = f(x)
        
        x[idx] = tmp - delta_x    # x : ndarray [0.9999  2.0]  
        fx_minus_delta = f(x)
        
        derivative_x[idx] = (fx_plus_delta - fx_minus_delta) / (2 * delta_x)
        
        x[idx] = tmp              #  x : ndarray [1.0  2.0]  
        it.iternext()
        
    return derivative_x

# Raw Data Set Loading
df = pd.read_csv('./data/ozone.csv')
training_data = df[['Ozone', 'Solar.R', 'Wind' ,'Temp']]

# 결치값을 삭제
training_data.dropna(how='any',
                     inplace=True)   # how='any' : 결치가 존재하는 행을 삭제

# 데이터에 이상치가 있는지 확인하고 제거
zscore_threshold = 2.0
outlier = training_data['Ozone'][(np.abs(stats.zscore(training_data['Ozone'])) > zscore_threshold)]
training_data = training_data.loc[np.isin(training_data['Ozone'],outlier, invert=True)]

# Training Data Set
x_data = training_data.drop('Ozone', axis=1, inplace=False)
t_data = training_data['Ozone'].values.reshape(-1,1)

# Weight, bias
W = np.random.rand(3,1)
b = np.random.rand(1)

# loss function
def loss_func(input_data):
    W = input_data[0]
    b = input_data[1]
    
    y = np.dot(x_data,W) + b
    return np.mean(np.power(t_data-y,2))

# predict
def predict(x):
    y = np.dot(x,W) + b
    return y

# learning_rate
learning_rate = 1e-4

# 반복 학습
for step in range(300000):
    
    input_param = np.concatenate((W.ravel(), b.ravel()), axis=0)  # [W b]
    derivative_result = learning_rate * numerical_derivative(loss_func, input_param)

    W = W - derivative_result[0].reshape(1,1)
    b = b - derivative_result[1]
    
    if step % 30000 == 0:
        input_param = np.concatenate((W.ravel(), b.ravel()), axis=0)
        print('W : {}, b:{}, loss:{}'.format(W, b, loss_func(input_param)))


W : [[-0.08274606]
 [ 0.4858181 ]
 [ 0.45774588]], b:[0.12973681], loss:2748.167938163268
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:[3.50386827e+10], loss:4.72483235277104e+28
W : [[1.71272038e+12]
 [1.71272038e+12]
 [1.71272038e+12]], b:

In [78]:
# 학습종료 후 예측
predict_data = predict(np.array([[274, 10.9, 68]]))
print('오존량 : {}'.format(predict_data)) 

오존량 : [[6.0445406e+14]]


In [None]:
# 2. tensorflow 구현

In [75]:
import tensorflow as tf
import pandas as pd
import numpy as np

df = pd.read_csv('./data/ozone.csv')
df = df[['Ozone','Solar.R','Wind','Temp']]
df = df.dropna(how='any', inplace=False)

#이상치 여부 확인하기
# plt.boxplot(df["Solar.R"]) 
# plt.boxplot(df["Temp"]) 
# plt.boxplot(df["Ozone"]) 
# plt.boxplot(df["Wind"])

# ozone 이상치 제거
q1, q3 = np.percentile(df['Ozone'],[25,75]) 
iqr= q3 - q1 
upper = q3 + iqr * 1.5 
mask = df['Ozone'] > upper 
df = df.loc[~mask] 

# wind 이상치제거
q_1, q_3 = np.percentile(df['Wind'],[25,75]) 
iqr = q_3 - q_1 
up = q_3 + iqr * 1.5 
mask2 = df['Wind'] > up 
df = df.loc[~mask2]

x_data = df.drop('Ozone', axis=1, inplace=False)   # (25,3)
t_data = df['Ozone'].values.reshape(-1,1)   # (25,1)

X = tf.placeholder(shape=[None,3], dtype=tf.float32)
T = tf.placeholder(shape=[None,1], dtype=tf.float32)

W = tf.Variable(tf.random.normal([3,1]))
b = tf.Variable(tf.random.normal([1]))

H = tf.matmul(X,W) + b   

loss = tf.reduce_mean(tf.square(H-T))

train = tf.train.GradientDescentOptimizer(learning_rate=1e-7).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())


# 반복 학습
for step in range(300000):
    
    _, W_val, b_val, loss_val = sess.run([train, W, b, loss], 
                                         feed_dict={X: x_data,
                                                    T: t_data})
    if step % 30000 == 0:
        print('W : {}, b : {}, loss : {}'.format(W_val, b_val, loss_val))
        

W : [[0.2919028 ]
 [0.48837873]
 [0.36716276]], b : [-2.1595547], loss : 3053.418701171875
W : [[0.05239572]
 [0.0617257 ]
 [0.44225323]], b : [-2.1727548], loss : 675.8026733398438
W : [[ 0.05257703]
 [-0.33299652]
 [ 0.490042  ]], b : [-2.1870599], loss : 623.0198364257812
W : [[ 0.05281532]
 [-0.6959982 ]
 [ 0.5337429 ]], b : [-2.1966984], loss : 578.3973388671875
W : [[ 0.05303305]
 [-1.0298387 ]
 [ 0.5739188 ]], b : [-2.203851], loss : 540.66455078125
W : [[ 0.0532665]
 [-1.3367454]
 [ 0.6107648]], b : [-2.2110035], loss : 508.763671875
W : [[ 0.05345909]
 [-1.6190245 ]
 [ 0.64472085]], b : [-2.218156], loss : 481.7784118652344
W : [[ 0.05362983]
 [-1.878597  ]
 [ 0.675976  ]], b : [-2.2253087], loss : 458.9566345214844
W : [[ 0.05377683]
 [-2.1170895 ]
 [ 0.7047238 ]], b : [-2.2324612], loss : 439.6710205078125
W : [[ 0.05394335]
 [-2.3368046 ]
 [ 0.73112917]], b : [-2.2396138], loss : 423.3312072753906


In [76]:
# Prediction

result = sess.run(H, 
                  feed_dict={X: np.array([[274, 10.9, 68]])})
print('오존량은 :', result)  

오존량은 : [[36.270367]]


In [None]:
# 3. sklearn 구현

In [62]:
from sklearn import linear_model 
import numpy as np 
import pandas as pd 

df = pd.read_csv('./data/ozone.csv')
df = df[['Ozone','Solar.R','Wind','Temp']]
df = df.dropna(how='any', inplace=False)

#이상치 여부 확인하기
# plt.boxplot(df["Solar.R"]) 
# plt.boxplot(df["Temp"]) 
# plt.boxplot(df["Ozone"]) 
# plt.boxplot(df["Wind"])

# ozone 이상치 제거
q1, q3 = np.percentile(df['Ozone'],[25,75]) 
iqr= q3 - q1 
upper = q3 + iqr * 1.5 
mask = df['Ozone'] > upper 
df = df.loc[~mask] 

# wind 이상치제거
q_1, q_3 = np.percentile(df['Wind'],[25,75]) 
iqr = q_3 - q_1 
up = q_3 + iqr * 1.5 
mask2 = df['Wind'] > up 
df = df.loc[~mask2]

# learning data
X = df[['Solar.R','Wind','Temp']]
Y = df['Ozone']

# learning
model = linear_model.LinearRegression()
model.fit(X,Y)

# prediction
prediction = model.predict([[274, 10.9, 68]])
print('오존량은 : {}'.format(prediction))


오존량은 : [23.71957907]


  "X does not have valid feature names, but"
