In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import datetime
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt

In [2]:
def one_hot(label):
    r = np.zeros(total_labels, dtype=int)
    r[label] = 1
    return r.tolist()

In [3]:
def batch_iter(data, batch_size, num_epochs, shuffle=False):
    data = np.array(data)
    data_size = len(data)
    # 每个epoch的num_batch
    num_batches_per_epoch = int((len(data) - 1) / batch_size) + 1
    print("num_batches_per_epoch:",num_batches_per_epoch)
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]

In [4]:
data = pd.read_csv('data/weather_data.csv', encoding = "ISO-8859-1")
index = data.columns.values
data_length = data.shape[0]

weather_type = set(data['Weather'])
weather_dict = dict(zip(weather_type,list(range(len(weather_type)))))

total_labels = len(weather_type)


data['Weather'].replace(pd.Series(weather_dict), inplace=True)

#data['Weather'] = data['Weather'].update(pd.Series(weather_dict))
data['Weather'] = data['Weather'].apply(one_hot)


In [5]:
data = data.sample(frac=1).reset_index(drop=True)

In [6]:
pd.Series(weather_dict)

Clear                        19
Cloudy                       14
Drizzle,Fog                  12
Fog                          18
Freezing Fog                  3
Mainly Clear                 17
Moderate Rain                15
Moderate Rain,Fog             7
Moderate Snow,Fog            16
Mostly Cloudy                 8
Rain                         13
Rain Showers                  0
Rain Showers,Snow Showers    11
Rain,Drizzle,Fog              6
Rain,Fog                      1
Rain,Snow                     9
Rain,Snow,Fog                 2
Snow                         10
Snow Showers                  5
Snow,Fog                      4
dtype: int64

In [7]:
print(len(pd.Series(weather_dict)))

20


In [8]:
train_data_length = int(data_length * 0.8)

train_data = data[:train_data_length]
test_data = data[train_data_length:]

In [9]:
index_x = index[:-1]
index_y = index[-1]

x_train = train_data[index_x].values
y_train = np.asarray(train_data[index_y].values.tolist())

x_test = test_data[index_x].values
y_test =  np.asarray(test_data[index_y].values.tolist())

In [10]:
batch_size = 20
num_epochs = 500

In [12]:
# define 2 placeholders
x = tf.placeholder(tf.float32, [None, x_train.shape[1]], name="input_x")
y = tf.placeholder(tf.float32, [None, y_train.shape[1]], name="input_y")

W1 = tf.Variable(tf.truncated_normal([x_train.shape[1],32], stddev=0.1), name='W1')
b1 = tf.Variable(tf.constant(0.1, shape=[32]), name='b1')
prediction1 = tf.nn.relu(tf.matmul(x, W1) + b1)

W2 = tf.Variable(tf.truncated_normal([32,32], stddev=0.1), name='W2')
b2 = tf.Variable(tf.constant(0.1, shape=[32]), name='b2')
prediction2 = tf.nn.relu(tf.matmul(prediction1, W2) + b2)


W3 = tf.Variable(tf.truncated_normal([32,total_labels], stddev=0.1), name='W3')
b3 = tf.Variable(tf.constant(0.1, shape=[total_labels]), name='b3')
prediction = tf.nn.relu(tf.matmul(prediction1, W3) + b3)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))

global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.0002
decay_steps = 5000
decay_rate = 0.95
    
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_steps=decay_steps, 
                                           decay_rate=decay_rate, staircase=True)
    #optimizer = tf.GradientDescent(learning_rate)
    #optimizer.minimize(...my loss..., global_step=global_step)


add_global = global_step.assign_add(1)
with tf.control_dependencies([add_global]):
    #train_op = opt.minimise(loss)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(prediction,1))

#求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [13]:
result_list = [[],[],[], []] #[[iterater],[training_accuracy],[testing_accuracy], [loss]]

In [25]:
tf.set_random_seed(1)
init = tf.global_variables_initializer()

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
     
    batches = batch_iter(list(zip(x_train, y_train)), batch_size, num_epochs)
    i = 0
    for batch in batches:
        i = i + 1
        x_batch, y_batch = zip(*batch)
        sess.run(train_step, feed_dict={x:x_batch, y:y_batch})
        
        if i % 1000 == 0:

            acc_train = sess.run(accuracy, feed_dict = {x:x_batch, y:y_batch})
            acc_test = sess.run(accuracy, feed_dict = {x:x_test, y:y_test})
            ts = sess.run(global_step)
            lr = sess.run(learning_rate)
            result_list[0].append(i)
            result_list[1].append(acc_train)
            result_list[2].append(acc_test)
            print("iterator " + str(i) + ", testing accuracy %.4f" % acc_test , ", training accuracy %.4f" % acc_train, 
                  ", learning rate %f" % lr )
    save_path = saver.save(sess, "/home/leo/Desktop/cmpt318_project/model/tf_model.ckpt")

num_batches_per_epoch: 527
iterator 1000, testing accuracy 0.2280 , training accuracy 0.1000 , learning rate 0.000200
iterator 2000, testing accuracy 0.1911 , training accuracy 0.1000 , learning rate 0.000200
iterator 3000, testing accuracy 0.2249 , training accuracy 0.2000 , learning rate 0.000200
iterator 4000, testing accuracy 0.2261 , training accuracy 0.3000 , learning rate 0.000200
iterator 5000, testing accuracy 0.2363 , training accuracy 0.1500 , learning rate 0.000190
iterator 6000, testing accuracy 0.2017 , training accuracy 0.2500 , learning rate 0.000190
iterator 7000, testing accuracy 0.2169 , training accuracy 0.2000 , learning rate 0.000190
iterator 8000, testing accuracy 0.2553 , training accuracy 0.2500 , learning rate 0.000190
iterator 9000, testing accuracy 0.2268 , training accuracy 0.3500 , learning rate 0.000190
iterator 10000, testing accuracy 0.1862 , training accuracy 0.1000 , learning rate 0.000180
iterator 11000, testing accuracy 0.2451 , training accuracy 0.

In [26]:
plt.plot(result_list[0], result_list[1], 'g')
plt.plot(result_list[0], result_list[2], 'b')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.legend(['Train accuracy', 'Test accuracy'])
plt.savefig('tf_model.png')