In [12]:
import pandas as pd
import numpy as np

from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split

In [2]:
RANDOM_STATE = 8675309

In [3]:
df = pd.read_excel("../Data/bereinigte_DATEN_ano.xlsx")

In [4]:
df.columns

Index(['Unnamed: 0', 'V1', 'athlete', 'gender', 'run', 'location', 'date',
       'TurnNr', 'MinimalRadius', 'VelocityAtMinRadius', 'VelocityAtTurnEntry',
       'VelocityAtTurnExit', 'TimeStarttoEnd', 'TimeStartToMinRad',
       'TimeMinRadToEnd', 'GlideTime_endtostart', 'TimeStarttoEnd_2',
       'Initiation', 'Completion', 'Gliding', 'Unnamed0', 'vectortonext',
       'vector2Dtonext', 'GATEDISTANCE', 'dist2Dtonext', 'vectortonextnorm',
       'vector2Dtonextnorm', 'STEEPNESS', 'azitonext', 'slopechangetonext',
       'angletonext', 'projtonext', 'projpttonext', 'offsettonext',
       'vectortonextnext', 'vector2Dtonextnext', 'dist3Dtonextnext',
       'dts2Dtonextnext', 'vectortonextnextnorm', 'vector2Dtonextnextnorm',
       'slopetonextnext', 'azitonextnext', 'HORIZONTALGATEDISTANCE',
       'VERTICALGATEDISTANCE', 'regularbs1orfs2', 'steepness_A', 'filter_$',
       'goofy2_regular1', '@2Ferse_3Zehe_4Ferse', 'Zehen1_Fersen2',
       'DistanzSchätz'],
      dtype='object')

In [133]:
df['vectortonext']

0       [-14.3678025804111,15.6543538719416,-6.1476324...
1       [-14.3678025804111,15.6543538719416,-6.1476324...
2       [-14.3678025804111,15.6543538719416,-6.1476324...
3       [-14.3678025804111,15.6543538719416,-6.1476324...
4       [-14.3678025804111,15.6543538719416,-6.1476324...
                              ...                        
6685    [17.9373525538831,13.1200019354001,-4.58418518...
6686    [17.9373525538831,13.1200019354001,-4.58418518...
6687    [17.9373525538831,13.1200019354001,-4.58418518...
6688    [17.9373525538831,13.1200019354001,-4.58418518...
6689    [17.9373525538831,13.1200019354001,-4.58418518...
Name: vectortonext, Length: 6690, dtype: object

In [78]:
X = (
    df[[
        'gender',
        'MinimalRadius',
        'VelocityAtMinRadius',
        'VelocityAtTurnEntry',
        'VelocityAtTurnExit',
    ]].values
)

In [106]:
X

array([[ 1.  , 10.63, 53.6 , 55.7 , 55.2 ],
       [ 1.  , 11.05, 52.3 , 57.7 , 55.6 ],
       [ 1.  , 10.99, 52.8 , 57.  , 58.2 ],
       ...,
       [ 2.  , 13.37, 50.6 , 52.2 , 53.7 ],
       [ 2.  , 10.28, 47.3 , 48.1 , 49.5 ],
       [ 2.  , 12.84, 50.6 , 51.  , 53.2 ]])

In [107]:
y = df[['TimeStarttoEnd', 'TimeStartToMinRad', 'TimeMinRadToEnd', 'GlideTime_endtostart', 'TimeStarttoEnd_2']].values

In [108]:
y

array([[1.28, 0.64, 0.64, 0.24, 1.52],
       [1.32, 0.6 , 0.72, 0.24, 1.56],
       [1.32, 0.6 , 0.72, 0.24, 1.56],
       ...,
       [1.36, 0.52, 0.84, 0.24, 1.6 ],
       [1.32, 0.64, 0.68, 0.  , 1.32],
       [1.36, 0.52, 0.84, 0.24, 1.6 ]])

In [109]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=RANDOM_STATE)

In [110]:
print(X_train.shape)
print(y_train.shape)

(5017, 5)
(5017, 5)


In [111]:
print(X_test.shape)
print(y_test.shape)

(1673, 5)
(1673, 5)


In [112]:
from sklearn.linear_model import LinearRegression

In [113]:
model = MultiOutputRegressor(LinearRegression()).fit(X_train, y_train)

In [114]:
model.score(X_test, y_test)

0.4585824276812328

In [115]:
from sklearn.linear_model import Ridge

In [116]:
model = MultiOutputRegressor(Ridge()).fit(X_train, y_train)

In [117]:
model.score(X_test, y_test)

0.45858267492371674

In [118]:
from sklearn.linear_model import RidgeCV

In [119]:
model = MultiOutputRegressor(RidgeCV()).fit(X_train, y_train)

In [120]:
model.score(X_test, y_test)

0.45858448440598953

In [121]:
from sklearn.tree import DecisionTreeRegressor

In [122]:
model = MultiOutputRegressor(DecisionTreeRegressor(max_depth=8, min_samples_leaf=15, random_state=RANDOM_STATE)).fit(X_train, y_train)

In [123]:
model.score(X_test, y_test)

0.4994544779868342

In [124]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [125]:
pred_train_tree = model.predict(X_train)
print(np.sqrt(mean_squared_error(y_train, pred_train_tree)))
print(r2_score(y_train, pred_train_tree))

0.13294932285868907
0.5888198736709183


In [126]:
pred_test_tree= model.predict(X_test)
print(np.sqrt(mean_squared_error(y_test,pred_test_tree))) 
print(r2_score(y_test, pred_test_tree))

0.14950820660914718
0.4994544779868342


In [127]:
from sklearn.ensemble import RandomForestRegressor

In [128]:
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=500, oob_score=True, random_state=RANDOM_STATE)).fit(X_train, y_train)

In [129]:
model.score(X_test, y_test)

0.5345299466893981

In [130]:
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [131]:
model = (
    make_pipeline(
        StandardScaler(),
        MultiOutputRegressor(SGDRegressor(max_iter=1000, tol=1e-3, random_state=RANDOM_STATE))
    ).fit(X_train, y_train)
)

In [132]:
model.score(X_test, y_test)

0.4584130690007826