In [1]:
import pandas as pd
import numpy as np
import cv2
import itertools


from cycling_manager.preprocess import split, get_data, preprocess

from cycling_manager.sequences import get_sequences, get_sequence

from tensorflow.keras.layers import TimeDistributed, LSTM, Dense, Input, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers.experimental.preprocessing import Rescaling


from tensorflow.keras import Model



import tensorflow as tf

In [2]:
df = preprocess(get_data())

[38;5;2mfull df shape (603619, 25)[0m
[38;5;2mfull df shape (603619, 25)[0m
[38;5;3mScore vertical meters - model: DecisionTreeRegressor() score: 0.999571943188838[0m
[38;5;3mScore vertical meters - model: DecisionTreeRegressor() score: 0.9999357017428392[0m
[38;5;3mScore vertical meters - model: KNeighborsClassifier() score: 0.9942391180462735[0m


In [16]:
train, test = split(df, start=2019, end=2021)

In [17]:
df.columns

Index(['name', 'year', 'type', 'date', 'result', 'gc', 'icon', 'race_ref',
       'race_name', 'race_detail', 'race_rank', 'distance', 'points',
       'Avg. speed winner:', 'Distance:', 'Parcours type:', 'ProfileScore:',
       'Vert. meters:', 'Startlist quality score:', 'Won how: ',
       'adjusted_points', 'Race category:', 'Points scale:', 'Arrival:',
       'Won how:', 'parcours_type_num', 'gt_binary', 'key', 'result_bin',
       'types_bin', 'icon_bin'],
      dtype='object')

In [18]:
y_dec_ls = []
tour_ls = []
season_ls = []
result_ls = []

for name, year, tour in train.values:
    y_decoder, tour_data, result, season_data = get_sequence(df, name, year, tour, img=True, binary=False)
    y_dec_ls.append(y_decoder)
    tour_ls.append(tour_data)
    result_ls.append(result)
    season_ls.append(season_data)

[31m
 dropped nan for ('jacopo-mosca', 2019, 'vuelta-a-espana')[0m
[31m
 dropped nan for ('luis-angel-mate', 2019, 'vuelta-a-espana')[0m


In [19]:
season_ls_img = []
to_drop_ls = []

for season in season_ls:
    base_path = '../raw_data/img_300/'
    season = [base_path+"_".join(race.split('/')[1:])+'.jpg' for race in season]
    
    img_ls = []
    
    for race in season:
        img = cv2.imread(race)
        try:
            img = tf.convert_to_tensor(img)
            img_ls.append(img)
        except:
            to_drop_ls.append('race/'+race.split('/')[-1].split('.')[0].replace('_', '/'))
        
    season_ls_img.append(np.array(img_ls))
    
season_ls_img = np.array(season_ls_img)
to_drop_ls = list(dict.fromkeys(to_drop_ls))
    

[ WARN:0@3609.604] global /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../raw_data/img_300/nc-poland_2020_result.jpg'): can't open/read file: check file path/integrity
[ WARN:0@3609.628] global /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../raw_data/img_300/nc-czech-republic-itt_2020_result.jpg'): can't open/read file: check file path/integrity
[ WARN:0@3609.628] global /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../raw_data/img_300/nc-czech-republic_2020_result.jpg'): can't open/read file: check file path/integrity
[ WARN:0@3609.635] global /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../raw_data/img_300/czech-cycling-tour_2020_s

In [20]:
result_ls_2 = [list(itertools.chain(*x[~x.race_ref.isin(to_drop_ls)].drop(columns='race_ref').values)) for x in result_ls]

In [21]:
def get_sequences_img(season_ls, result_ls):
    new_season_ls = []
    new_result_ls = []
    
    for season, results in zip(season_ls, result_ls):
        if season.shape[0] == 0:
            pass
        else:
            tensors_season = tf.convert_to_tensor(season, dtype=tf.float32)
            new_season_ls.append(tensors_season)
            
            tensors_y = tf.convert_to_tensor(results, dtype=tf.float32)
            new_result_ls.append(tensors_y)
        
    X_encoder_train = tf.ragged.stack(new_season_ls).to_tensor()
    y_encoder_train = tf.ragged.stack(new_result_ls).to_tensor()
    
    return X_encoder_train, y_encoder_train
        

In [22]:
X_encoder_train, y_encoder_train = get_sequences_img(season_ls_img, result_ls_2)

X_encoder_train.shape, y_encoder_train.shape

(TensorShape([921, 40, 150, 300, 3]), TensorShape([921, 40]))

In [23]:
model = Sequential()
# define CNN model
model.add(TimeDistributed(Rescaling(1./255, input_shape=(150,300,3))))

model.add(TimeDistributed(Conv2D(16, kernel_size=10, activation='relu')))
model.add(TimeDistributed(MaxPooling2D(3)))
    
model.add(TimeDistributed(Conv2D(32, kernel_size=8, activation="relu")))
model.add(TimeDistributed(MaxPooling2D(3)))

model.add(TimeDistributed(Conv2D(32, kernel_size=6, activation="relu")))
model.add(TimeDistributed(MaxPooling2D(3)))

model.add(TimeDistributed(Flatten()))

model.add(TimeDistributed(Dense(30, activation='relu')))

# define LSTM model
model.add(LSTM(units=15, dropout=0.2, return_sequences=True, activation='tanh'))
model.add(Dense(units=1, activation='linear'))

In [24]:
initial_learning_rate = 0.01

lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=1000, decay_rate=0.5)

adam = Adam(learning_rate=lr_schedule)

    
model.compile(optimizer=adam, loss='mean_absolute_error', metrics='mean_absolute_error')

In [25]:
model.summary()

ValueError: This model has not yet been built. Build the model first by calling `build()` or by calling the model on a batch of data.

In [26]:
batch_size=16
patience=20
validation_split=0.3

es = EarlyStopping(monitor="val_loss",
                       patience=patience,
                       restore_best_weights=True,
                       verbose=0)

history = model.fit(X_encoder_train, y_encoder_train,
                        validation_split=validation_split,
                        epochs=1000,
                        batch_size=batch_size,
                        callbacks=[es],
                        verbose=1)

Epoch 1/1000
 1/41 [..............................] - ETA: 15:19 - loss: 37.8737 - mean_absolute_error: 37.8737