# Temperature prediction
### Tensorflow DNNRegressor is used to predict the temperature values in the future.
##### https://github.com/tensorflow/docs/blob/r1.14/site/en/api_docs/python/tf/estimator/DNNRegressor.md

In [None]:
# Import libraries
import datetime as dt
import pandas as pd
import numpy as np
# Change tensorflow version for using tensorflow.contrib
%tensorflow_version 1.x
import tensorflow as tf
# Disable tensorflow warnings and logs to have a cleaner output
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

TensorFlow 1.x selected.


In [None]:
# Read csv file with dates and temperatures
df = pd.read_csv('data_2017_2020.csv', sep=';')
df.head()

Unnamed: 0,Date,Temp
0,2017.01.01,-6.7
1,2017.01.02,-5.6
2,2017.01.04,0.2
3,2017.01.06,-8.4
4,2017.01.09,-8.6


In [None]:
# Convert the 'Date' column to datetime type
df['Date']=pd.to_datetime(df['Date'])

# Split the 'Date' column into Year, Month and Day
df['Year'] = pd.DatetimeIndex(df['Date']).year
df['Month'] = pd.DatetimeIndex(df['Date']).month
df['Day'] = pd.DatetimeIndex(df['Date']).day

# Drop the 'Date' column
df =df.drop(['Date'], axis=1)
df.head()

Unnamed: 0,Temp,Year,Month,Day
0,-6.7,2017,1,1
1,-5.6,2017,1,2
2,0.2,2017,1,4
3,-8.4,2017,1,6
4,-8.6,2017,1,9


In [None]:
# Shuffle data
df = df.sample(frac=1).reset_index(drop=True)

# Split shuffled data: 80% to train, 20% to evaluate
splitting_index = round(len(df)*0.8)
train_df = df.iloc[:splitting_index]
evaluate_df = df.iloc[splitting_index:]

In [None]:
# Add test data
# 28th of October
oct28 = pd.to_datetime('2020-10-28')
# 3rd of November
nov3 = pd.to_datetime('2020-11-03')
# 24th of November
nov24 = pd.to_datetime('2020-11-24')

# Create dataframe from the enties
test_df = pd.DataFrame([oct28, nov3, nov24], columns=['Date'])
# Split the 'Date' column into Year, Month and Day
test_df['Year'] = pd.DatetimeIndex(test_df['Date']).year
test_df['Month'] = pd.DatetimeIndex(test_df['Date']).month
test_df['Day'] = pd.DatetimeIndex(test_df['Date']).day
# Drop the 'Date' column
test_df =test_df.drop(['Date'], axis=1)
test_df

Unnamed: 0,Year,Month,Day
0,2020,10,28
1,2020,11,3
2,2020,11,24


In [None]:
# Input builders
# Define features
features =['Year', 'Month', 'Day']
# Convert data into tensors
def input_fn(df, training = True):
    # Return tf.data.Dataset of (x, y) tuple where y represents label's class index
    cols = {f: tf.constant(df[f].values) for f in features}
    #{'Day': <tf.Tensor 'Const_5:0' shape=(789,) dtype=int64>,
    #'Month': <tf.Tensor 'Const_4:0' shape=(789,) dtype=int64>,
    #'Year': <tf.Tensor 'Const_3:0' shape=(789,) dtype=int64>}

    if training:
        # Convert label column into a constant tensor
        label = tf.constant(df['Temp'].values)

        # Return the feature columns and the label during training, evaluation
        return cols, label
    
    # Return the feature columns during test    
    return cols

def train_input_fn():
    return input_fn(train_df)

def eval_input_fn():
    # Return tf.data.Dataset of (x, y) tuple where y represents label's class index
    return input_fn(evaluate_df)

def test_input_fn():
    # Return tf.data.Dataset of (x, None) tuple
    return input_fn(test_df, False)

In [None]:
# Create feature columns for learning
feature_columns = []
for feature in features:
    feature_columns.append(tf.contrib.layers.real_valued_column(feature))
# Hidden units per layer
model_hidden_units = [32, 64]
# Optimizer to train the model
model_optimizer = tf.train.AdamOptimizer(learning_rate=0.01) 
# Model directory
model_dir = "tf_model_full"

In [None]:
# Create tensorflow DNN regressor
regressor = tf.contrib.learn.DNNRegressor(
    feature_columns=feature_columns, hidden_units=model_hidden_units, optimizer=model_optimizer)

In [None]:
# Fit the regressor
regressor.fit(input_fn=train_input_fn, steps=10000)

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x7f0411ff0ac8>, 'hidden_units': [32, 64], 'feature_columns': (_RealValuedColumn(column_name='Year', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='Month', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='Day', dimension=1, default_value=None, dtype=tf.float32, normalizer=None)), 'optimizer': <tensorflow.python.training.adam.AdamOptimizer object at 0x7f04045bbb38>, 'activation_fn': <function relu at 0x7f041bb72730>, 'dropout': None, 'gradient_clip_norm': None, 'embedding_lr_multipliers': None, 'input_layer_min_slice_size': None})

In [None]:
# Evaluate the model
# Loss is calculated by using mean square error
results = regressor.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
    print("%s: %s" % (key, results[key]))

global_step: 10000
loss: 11.689581


In [None]:
# Predict test values
predicted_output = regressor.predict(input_fn=test_input_fn)
prediction = list(predicted_output)
prediction

[10.327097, 8.600786, 5.475446]

In [None]:
# Print predicted values
for i in range(len(test_df)):
  print("The predicted temperature value on {0}. {1}. {2} is {3:.2f}°C.".format(test_df.loc[i]['Day'], test_df.loc[i]['Month'], test_df.loc[i]['Year'], prediction[i]))

The predicted temperature value on 28. 10. 2020 is 10.33°C.
The predicted temperature value on 3. 11. 2020 is 8.60°C.
The predicted temperature value on 24. 11. 2020 is 5.48°C.
