In [1]:
import os
import sys
sys.path.append("/content/drive/MyDrive/Hackathon/code/")
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pickle
import json

In [2]:
import tensorflow as tf
from tensorflow.keras import layers, optimizers
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Importing and preprocessing the data

In [4]:
f = open("/content/drive/MyDrive/Hackathon/dataset/dataset_reoganized_v8.pkl", 'rb')
dataset = pickle.load(f)
for i in range(len(dataset)):
    dataset[i] = dataset[i].dropna()
f.close()

In [141]:
dataset[0].columns

Index(['index', 'M_SESSION_LINK_IDENTIFIER', 'M_TRACK_TEMPERATURE',
       'M_FORECAST_ACCURACY', 'M_AIR_TEMPERATURE', 'M_SESSION_TIME_LEFT',
       'M_SESSION_DURATION', 'M_WEATHER', 'M_TIME_OFFSET',
       'M_TRACK_TEMPERATURE_CHANGE', 'M_AIR_TEMPERATURE_CHANGE',
       'M_RAIN_PERCENTAGE', 'TIME_SESSION'],
      dtype='object')

Dropping unwanted columns and rearranging indices

In [5]:
for i, d in enumerate(dataset):
    dataset[i] = d.reset_index()
    dataset[i]["TIMESTAMP"] = dataset[i]["TIMESTAMP"].view(np.int64)*1e-9
    dataset[i] = dataset[i].set_index("TIMESTAMP")
    dataset[i] = dataset[i].drop(columns=["M_TRACK_LENGTH",
                                          "M_GAME_PAUSED",
                                          "M_NUM_WEATHER_FORECAST_SAMPLES",
                                          "M_TRACK_ID",
                                          "M_SESSION_TYPE",
                                          "M_WEATHER_FORECAST_SAMPLES_M_SESSION_TYPE",
                                          "M_SESSION_UID"], axis=1).copy()

Adding external weather information to our model

In [6]:
weather = pd.read_csv("/content/drive/MyDrive/Hackathon/dataset/weatherHistory.csv")

In [7]:
# Dictionary mapping from the F1 dataset weather types to the external dataset weather types
weather_type_mapping = {"0": ['Breezy', 'Breezy and Dry', 'Clear', 'Windy and Dry', 'Windy'],
                        "1": ['Breezy and Foggy', 'Breezy and Mostly Cloudy', 'Breezy and Partly Cloudy',
                              'Humid and Partly Cloudy', 'Partly Cloudy', 'Windy and Partly Cloudy', 'Dangerously Windy and Partly Cloudy'],
                        "2": ['Breezy and Mostly Cloudy', 'Breezy and Overcast', 'Dry and Mostly Cloudy',
                              'Humid and Mostly Cloudy', 'Humid and Overcast', 'Mostly Cloudy', 'Overcast',
                              'Windy and Mostly Cloudy', 'Windy and Overcast'], 
                        "3": ['Drizzle'],
                        "4": ['Rain']}

def str_to_cat(label, dict):
  """
  Simple function assigning a label to the corresponding F1 dataset class.
  Args:
    label (string): external dataset label
    dict (dictionary): weather type mapping
  """
  for k, v in dict.items():
    if label in v:
      return int(k)

Applying some changes to the external dataset...

In [8]:
weather["M_AIR_TEMPERATURE"] = (weather["Temperature (C)"]).astype(int)  # Convert to int to increase granularity
weather["M_WEATHER"] = weather["Summary"].apply(lambda x: str_to_cat(x, weather_type_mapping))  # Assign numeric category to label
weather = weather.drop(columns=['Formatted Date', 'Summary', 'Precip Type', 'Apparent Temperature (C)',
                                'Loud Cover', 'Daily Summary', 'Temperature (C)']).copy()
w = pd.DataFrame(columns=weather.columns)
for temp in np.unique(weather["M_AIR_TEMPERATURE"]):
  for wtype in np.unique(weather["M_WEATHER"]):
      # Keeping only one value for every (temperature, weather_type) pair
      w = w.append(weather[(weather["M_AIR_TEMPERATURE"] == temp) & (weather["M_WEATHER"] == wtype)].mean(), ignore_index=True)

w = w.dropna()

w.head(10)

Unnamed: 0,Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars),M_AIR_TEMPERATURE,M_WEATHER
6,0.78,11.27,20.0,0.0,1033.3,-20.0,0.0
7,0.78,3.22,200.0,4.025,1033.8,-20.0,1.0
24,0.91,3.6386,161.0,5.2003,1035.9,-17.0,0.0
25,0.9,2.7853,228.0,3.5742,1032.18,-17.0,1.0
30,0.87,11.27,160.0,7.084,1016.15,-16.0,0.0
31,0.415,5.635,190.0,4.508,1029.5,-16.0,1.0
32,0.64,7.99526,210.2,4.75594,1031.142,-16.0,2.0
36,0.494,8.8067,178.4,8.52656,1016.072,-15.0,0.0
37,0.91,6.1663,170.0,5.9731,1014.21,-15.0,1.0
38,0.543333,8.055367,197.333333,5.8282,1030.93,-15.0,2.0


Now, it is time to expand the F1 video game dataset using the external weather dataset. 

In order to do this, we merge both DataFrames on two columns: weather type and temperature. 

That way, we provide more information to the neural network.

In [9]:
session_list = []
for i, sess in enumerate(dataset):  # Iterate through our sessions
  df = sess.merge(w, on=['M_WEATHER', 'M_AIR_TEMPERATURE'], how='inner')
  session_list.append(df)

Here, we can see that the data has been augmented with new values.

In [10]:
ts = session_list[1]
ts.head(1)

Unnamed: 0,index,M_SESSION_LINK_IDENTIFIER,M_TRACK_TEMPERATURE,M_FORECAST_ACCURACY,M_AIR_TEMPERATURE,M_SESSION_TIME_LEFT,M_SESSION_DURATION,M_WEATHER,M_TIME_OFFSET,M_TRACK_TEMPERATURE_CHANGE,M_AIR_TEMPERATURE_CHANGE,M_RAIN_PERCENTAGE,TIME_SESSION,Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Pressure (millibars)
0,186900,18385983,33,0,25,7023,7200,0,0,2,2,4,177,0.516218,9.638353,143.109244,12.197982,929.92563


In [11]:
# with open("/content/drive/MyDrive/Hackathon/dataset/dataset_reoganized_extended_v1.pkl", "wb") as f:
#   pickle.dump(mylist, f)

# Creating Dataset object

Creating training and test sets

We use a train-test split of 80%-20%, as is the practice in Data Science.

In [262]:
correct_dfs = []  # Filtering out datasets that have no predictions for t+60 values.
for l in session_list:
  if 60 in np.array(l['M_TIME_OFFSET'].values):
    correct_dfs.append(l)

training_sess = pd.concat(correct_dfs[1:80], axis=0)
print(training_sess["M_WEATHER"].value_counts(), training_sess.shape[0])
testing_sess = pd.concat(correct_dfs[81:], axis=0)
print(testing_sess["M_WEATHER"].value_counts(), testing_sess.shape[0])

0    96295
1    46304
2    35013
Name: M_WEATHER, dtype: int64 177612
0    18489
1    18105
2     6870
Name: M_WEATHER, dtype: int64 43464


Importing already created datasets because creating the dataset is a costly process that takes quite some time.

In [288]:
with open("/content/drive/MyDrive/Hackathon/dataset/train_dataset.pkl", "rb") as f:
  x_train, y_train1, y_train2 = pickle.load(f)
with open("/content/drive/MyDrive/Hackathon/dataset/test_dataset.pkl", "rb") as f:
  x_test, y_test1, y_test2 = pickle.load(f)

In [289]:
from src.weather_dataset import WeatherDataset

In [290]:
train_dataset = WeatherDataset(sequence_length=50, df=training_sess, batch_size=2, instanciate_data=False)
train_dataset.import_data(x_train, y_train1, y_train2)
train_dataset.create_dataset()
test_dataset = WeatherDataset(sequence_length=50, df=testing_sess, batch_size=1, instanciate_data=False)
test_dataset.import_data(x_test, y_test1, y_test2)
test_dataset.create_dataset()

# Creating the Model

In [291]:
params = {
    "recurrent_cell_shape": 512,
    "learning_rate": 1e-3,
    "epochs": 10,
    "batch_size": 32
}

In [313]:
from src.model import WeatherModel

In [444]:
sequence_length = train_dataset.sequence_length
n_features = train_dataset.n_features

model = WeatherModel(sequence_length, n_features, params)

In [435]:
model.model.summary()

Model: "model_47"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_48 (InputLayer)          [(None, 50, 17)]     0           []                               
                                                                                                  
 lstm_94 (LSTM)                 (None, 50, 512)      1085440     ['input_48[0][0]']               
                                                                                                  
 lstm_95 (LSTM)                 (None, 512)          2099200     ['lstm_94[0][0]']                
                                                                                                  
 dense_47 (Dense)               (None, 256)          131328      ['lstm_95[0][0]']                
                                                                                           

In [445]:
model.compile()

In [449]:
#Code for training the model
# model.fit(train_dataset.data)

We import weights from training.

In [448]:
model.model.load_weights('lstm_model_weights.h5')

We evaluate the model on the test dataset.
We report the following metrics :
* Forecast at T+5 : 0.97 weather type Categorical Accuracy
* Forecast at T+10 : 0.92 weather type Categorical Accuracy
* Forecast at T+15 : 0.88 weather type Categorical Accuracy
* Forecast at T+30 : 0.87 weather type Categorical Accuracy
* Forecast at T+60 : 0.85 weather type Categorical Accuracy
* 0.088 Rain percentage Mean Average Error (mean of predictions for the 5 future timesteps)

In [297]:
model.model.evaluate(test_dataset.data)



[4.3531012535095215,
 0.19434399902820587,
 0.6789713501930237,
 1.0377877950668335,
 1.1051759719848633,
 1.246863603591919,
 0.08883225917816162,
 0.9775831699371338,
 0.9280766248703003,
 0.8883733153343201,
 0.8782106041908264,
 0.8555322885513306,
 0.08883225917816162]

In these cells, we import the original dataset and take a line from it.

In [300]:
a = pd.read_csv("/content/drive/MyDrive/Hackathon/weather.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


We can see the output of the model.

In [446]:
single_line = a.iloc[np.random.randint(a.shape[0])]  # Selecting one line from the original dataset
pred = model.predict(single_line, w, test_dataset)
print(json.dumps(pred, indent=2))

[17]
[
  {
    "5": {
      "type": 0,
      "rain_percentage": 0.05
    },
    "10": {
      "type": 0,
      "rain_percentage": 0.01
    },
    "15": {
      "type": 0,
      "rain_percentage": 0.05
    },
    "30": {
      "type": 0,
      "rain_percentage": 0.08
    },
    "60": {
      "type": 1,
      "rain_percentage": 0.1
    }
  }
]
