In [0]:
import pandas as pd
import numpy as np
import pickle
import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf
from sklearn.model_selection import train_test_split
from pprint import pprint as pp

Import data from pickled dataset

In [64]:
f = open('training_data.pickle', 'rb')
data = pickle.load(f)
f.close()
pd.set_option('display.max_columns', None)

         badminton   basketball   volleyball         temp     dewpoint  \
count  1544.000000  1544.000000  1544.000000  1544.000000  1544.000000   
mean      2.547280     4.062176     0.888601     3.186010    -1.390155   
std       3.697449     6.412582     2.242123     9.478198     9.202316   
min       0.000000     0.000000     0.000000   -23.100000   -28.500000   
25%       0.000000     1.000000     0.000000    -2.900000    -7.525000   
50%       1.000000     2.000000     0.000000     1.700000    -2.400000   
75%       4.000000     5.000000     1.000000     8.600000     3.825000   
max      32.000000    72.000000    29.000000    29.600000    23.200000   

          humidity         year        month          day         hour  
count  1544.000000  1544.000000  1544.000000  1544.000000  1544.000000  
mean     73.879534  2018.369819     7.060233    14.799223    17.937824  
std      15.609243     0.646960     4.260582     8.665071     2.924191  
min      20.000000  2017.000000     1.000

Split Data into 4 Sets; appropriate labels for sport and the weather vectors for training

In [65]:
weather = pd.concat([data['hour'],data['day'],data['month'],data['year'],data['temp'],data['dewpoint'],data['humidity']], axis=1)

weather_basketball_train, weather_basketball_test, basketball_train, basketball_test = train_test_split(weather, data['basketball'], test_size=0.33, random_state=42)
weather_volleyball_train, weather_volleyball_test, volleyball_train, volleyball_test = train_test_split(weather, data['volleyball'], test_size=0.33, random_state=42)
weather_badminton_train, weather_badminton_test, badminton_train, badminton_test = train_test_split(weather, data['badminton'], test_size=0.33, random_state=42)

10 23


Next the categorical and numerical data must be split and encoded appropriately.

In [0]:
CATEGORICAL_COLUMNS = ['year', 'month', 'day','hour']
NUMERIC_COLUMNS = ['temp', 'dewpoint','humidity']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = data[feature_name].unique()
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))


Create input function for pipelining. 

In [67]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)
    return ds
  return input_function


train_input_fn_basketball = make_input_fn(weather_basketball_train, basketball_train)
eval_input_fn_basketball = make_input_fn(weather_basketball_test, basketball_test, num_epochs=1, shuffle=False)

train_input_fn_volleyball = make_input_fn(weather_volleyball_train, volleyball_train)
eval_input_fn_volleyball = make_input_fn(weather_volleyball_test, volleyball_test, num_epochs=1, shuffle=False)

train_input_fn_badminton = make_input_fn(weather_badminton_train, badminton_train)
eval_input_fn_badminton = make_input_fn(weather_badminton_test, badminton_test, num_epochs=1, shuffle=False)


      hour  day  month  year  temp  dewpoint  humidity
13      20   15     12  2019  -3.5      -6.5      79.0
162     21   25     10  2019   3.7       2.7      93.0
1704    23   27     11  2017   0.8      -2.0      81.0
312     23   15      9  2019  17.6      16.1      91.0
321     17   13      9  2019  24.1      21.4      85.0
...    ...  ...    ...   ...   ...       ...       ...
1250    20   17      4  2018   0.0      -0.9      94.0
1422    15    9      2  2018  -6.6      -8.5      86.0
956     20   20     11  2018  -4.4      -8.9      71.0
1614    15    9     12  2017  -1.4      -3.9      83.0
1246    16   22      4  2018  15.9      -6.9      20.0

[1034 rows x 7 columns] 13      9
162     3
1704    1
312     2
321     2
       ..
1250    0
1422    4
956     2
1614    8
1246    0
Name: basketball, Length: 1034, dtype: int64


Train linear regression models and measure accuracy

In [78]:
basketball_linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns)
basketball_linear_est.train(train_input_fn_basketball)
result = basketball_linear_est.evaluate(eval_input_fn_basketball)

badminton_linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns)
badminton_linear_est.train(train_input_fn_badminton)
result = badminton_linear_est.evaluate(eval_input_fn_badminton)


volleyball_linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns)
volleyball_linear_est.train(train_input_fn_volleyball)
result = volleyball_linear_est.evaluate(eval_input_fn_volleyball)


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp046js26k', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff13002f4e0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done c

Gather input 

In [69]:
print('Enter the year')
year = input()
print('Enter the month (1-12)')
month = input()
print('Enter the day (1-31)')
day = input()
print('Enter the hour  (10-23)')
hour = input()
print('Enter the temperature in celsius')
temp = input()
print('Enter the dewpoint in celsius')
dewpoint = input()
print('Enter the relative humidity (0-100)')
humidity = input()

Enter the year
2019
Enter the month (1-12)
11
Enter the day (1-31)
22
Enter the hour  (10-23)
12
Enter the temperature in celsius
2
Enter the dewpoint in celsius
0
Enter the relative humidity (0-100)
25


Obtain prediction from all models

In [76]:
prediction_input = pd.DataFrame(data=[[int(year),int(month),int(day),int(hour),float(temp),float(dewpoint),float(humidity)]],columns=['year', 'month', 'day','hour','temp', 'dewpoint','humidity'])
prediction = basketball_linear_est.predict(input_fn=tf.estimator.inputs.pandas_input_fn(x=prediction_input, y=None, batch_size=1, num_epochs=None, shuffle=False))
prediction_output = {}
for p in prediction:
  prediction_output['basketball'] = 'Number of people playing basketball at ' + day + '/' + month + '/' + year + ' ' + hour+':00 is ' + str(int(p['predictions'][0]))
  break

prediction_input = pd.DataFrame(data=[[int(year),int(month),int(day),int(hour),float(temp),float(dewpoint),float(humidity)]],columns=['year', 'month', 'day','hour','temp', 'dewpoint','humidity'])
prediction = volleyball_linear_est.predict(input_fn=tf.estimator.inputs.pandas_input_fn(x=prediction_input, y=None, batch_size=1, num_epochs=None, shuffle=False))
for p in prediction:
  prediction_output['volleyball'] = 'Number of people playing volleyball at ' + day + '/' + month + '/' + year + ' ' + hour+':00 is ' + str(int(p['predictions'][0]))
  break

prediction_input = pd.DataFrame(data=[[int(year),int(month),int(day),int(hour),float(temp),float(dewpoint),float(humidity)]],columns=['year', 'month', 'day','hour','temp', 'dewpoint','humidity'])
prediction = badminton_linear_est.predict(input_fn=tf.estimator.inputs.pandas_input_fn(x=prediction_input, y=None, batch_size=1, num_epochs=None, shuffle=False))
for p in prediction:
  prediction_output['badminton'] = 'Number of people playing badminton at ' + day + '/' + month + '/' + year + ' ' + hour+':00 is ' + str(int(p['predictions'][0]))
  break

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp3c7ns0nj/model.ckpt-330
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpfv2zxh8i/model.ckpt-330
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpj2yys04b/model.ckpt-330
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Output predictions

In [77]:
pp(prediction_output)

{'badminton': 'Number of people playing badminton at 22/11/2019 12:00 is 1',
 'basketball': 'Number of people playing basketball at 22/11/2019 12:00 is 0',
 'volleyball': 'Number of people playing volleyball at 22/11/2019 12:00 is 0'}
