In [41]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import itertools

In [42]:
def load_artembev():
    res = pd.read_excel('../data/1Artembev_PhC_sport_and_tripms.xls', header=0)
    res = res[['Physiological Cost (PhC)', 'TRIMP1', 'TRIMP2', 'TRIMP3', 'TRIMP4']]
    res['PhC'] = res['Physiological Cost (PhC)']
    res.drop(columns=["Physiological Cost (PhC)"], inplace=True)
    return res[11::]

In [43]:
def load_others():
    columns_to_remain = ['PhC', "t1(passive,slow)", "t2(passive,fast)", "t3(active,slow)", 't4(active,fast)']
    prokopbev = pd.read_excel('../data/2Prokopbev_PhC_sport_and_tripms.xls', header=0)
    prokopbev = prokopbev[columns_to_remain]
    prokopbev['TRIMP1'] = prokopbev["t1(passive,slow)"]
    prokopbev['TRIMP2'] = prokopbev["t2(passive,fast)"]
    prokopbev['TRIMP3'] = prokopbev["t3(active,slow)"]
    prokopbev['TRIMP4'] = prokopbev['t4(active,fast)']
    volkov = pd.read_excel('../data/3Volkov_PhC_sport_and_tripms.xls', header=0)
    volkov = volkov[columns_to_remain]
    volkov['TRIMP1'] = volkov["t1(passive,slow)"]
    volkov['TRIMP2'] = volkov["t2(passive,fast)"]
    volkov['TRIMP3'] = volkov["t3(active,slow)"]
    volkov['TRIMP4'] = volkov['t4(active,fast)']
    
    cols_to_remove = ["t1(passive,slow)", "t2(passive,fast)", "t3(active,slow)", 't4(active,fast)']
    
    prokopbev.drop(columns=cols_to_remove, inplace=True)
    volkov.drop(columns=cols_to_remove, inplace=True)
    return prokopbev[8::], volkov[34::]

In [44]:
artembev = load_artembev()
prokopbev, volkov = load_others()

In [45]:
spacemen = {"Artem'ev":artembev, "Prokop'ev":prokopbev, "Volkov":volkov}

In [46]:
def compute_interaction_p_values(spacemen, verbose=False):
    p_values = pd.DataFrame(columns=['spaceman', 'factor#1', 'factor#2', 'P'])

    for man in spacemen:
        df = spacemen[man].astype(np.float)

        y = df['PhC']
        X = df.drop(columns=['PhC'])

        for f1, f2 in itertools.combinations(X.columns, 2):

            formula = f"PhC ~ {f1} + {f2} + {f1}:{f2}"
            
            if verbose:
                print(man, formula)

            model = ols(formula, df).fit()

            try:
                anova_res = sm.stats.anova_lm(model, typ=2)
            except MemoryError as e:
                print('Not enough memory')
                continue

            interaction_p_val = anova_res.iloc[2,3]
            p_values = p_values.append(
                {'spaceman':man, 'factor#1':f1, 'factor#2':f2, 'P':interaction_p_val},
                ignore_index=True,
            )
    return p_values

In [47]:
p_values = compute_interaction_p_values(spacemen)
p_values

Unnamed: 0,spaceman,factor#1,factor#2,P
0,Artem'ev,TRIMP1,TRIMP2,0.49456
1,Artem'ev,TRIMP1,TRIMP3,0.972688
2,Artem'ev,TRIMP1,TRIMP4,0.861727
3,Artem'ev,TRIMP2,TRIMP3,0.160832
4,Artem'ev,TRIMP2,TRIMP4,0.641701
5,Artem'ev,TRIMP3,TRIMP4,0.09659
6,Prokop'ev,TRIMP1,TRIMP2,0.236348
7,Prokop'ev,TRIMP1,TRIMP3,0.787305
8,Prokop'ev,TRIMP1,TRIMP4,0.304377
9,Prokop'ev,TRIMP2,TRIMP3,0.207375


In [48]:
artembev3 = artembev.copy()
# data.drop(columns=['day number'], inplace=True)
artembev3["PhC"] = artembev3["PhC"][1::]
cols_to_drop = []
for col in artembev3.columns:
    artembev3[col+'_mean_3_days'] = np.asarray([artembev3.loc[i-1:i+1, col].mean() for i in artembev3.index])
#     data.drop(columns=[col], inplace=True)
    cols_to_drop.append(col)
OLD_PHC_COLNAME = "PhC_mean_3_days"
NEW_PHC_COLNAME = 'PhC_mean_target_3_days'
artembev3[NEW_PHC_COLNAME] = artembev3[OLD_PHC_COLNAME][3::].tolist() + 3*[np.nan]
artembev3[OLD_PHC_COLNAME] = np.asarray([artembev3.loc[i-2:i, "PhC"].mean() for i in artembev3.index])
# data['const'] = np.full(data.shape[0], 1)
# cols_to_drop.append(OLD_PHC_COLNAME)
artembev3.drop(columns=cols_to_drop, inplace=True)
artembev3.dropna(inplace=True)

artembev3['PhC'] = artembev3[NEW_PHC_COLNAME]
artembev3.drop(columns=[NEW_PHC_COLNAME], inplace=True)

artembev3.head()

Unnamed: 0,TRIMP1_mean_3_days,TRIMP2_mean_3_days,TRIMP3_mean_3_days,TRIMP4_mean_3_days,PhC_mean_3_days,PhC
12,994.960748,103.188621,1260.987536,758.520146,0.395014,0.408444
13,997.482226,61.388053,1853.448052,1052.005165,0.393683,0.378842
14,770.478993,36.772901,1997.440934,1154.630564,0.393683,0.34924
15,981.762669,177.930531,1626.721428,784.5795,0.400398,0.372751
16,958.164133,173.953128,1848.569418,1049.97434,0.408444,0.396262


In [49]:
volkov3 = volkov.copy()
# data.drop(columns=['day number'], inplace=True)
volkov3["PhC"] = volkov3["PhC"][1::]
cols_to_drop = []
for col in volkov3.columns:
    volkov3[col+'_mean_3_days'] = np.asarray([volkov3.loc[i-1:i+1, col].mean() for i in volkov3.index])
#     data.drop(columns=[col], inplace=True)
    cols_to_drop.append(col)
OLD_PHC_COLNAME = "PhC_mean_3_days"
NEW_PHC_COLNAME = 'PhC_mean_target_3_days'
volkov3[NEW_PHC_COLNAME] = volkov3[OLD_PHC_COLNAME][3::].tolist() + 3*[np.nan]
volkov3[OLD_PHC_COLNAME] = np.asarray([volkov3.loc[i-2:i, "PhC"].mean() for i in volkov3.index])
# data['const'] = np.full(data.shape[0], 1)
# cols_to_drop.append(OLD_PHC_COLNAME)
volkov3.drop(columns=cols_to_drop, inplace=True)
volkov3.dropna(inplace=True)

volkov3['PhC'] = volkov3[NEW_PHC_COLNAME]
volkov3.drop(columns=[NEW_PHC_COLNAME], inplace=True)

volkov3.head()

Unnamed: 0,PhC_mean_3_days,TRIMP1_mean_3_days,TRIMP2_mean_3_days,TRIMP3_mean_3_days,TRIMP4_mean_3_days,PhC
35,0.372535,1210.96677,534.391373,1705.116494,1258.114116,0.454231
36,0.42324,1250.191466,495.574388,1704.549627,1258.606351,0.461979
37,0.430988,957.759369,178.789358,1402.483608,1063.290216,0.450571
38,0.460214,946.339431,194.499908,1402.702149,1062.77782,0.441445
39,0.454231,916.326411,225.537217,1402.339838,1064.446482,0.454477


In [50]:
prokopbev3 = prokopbev.copy()
# data.drop(columns=['day number'], inplace=True)
prokopbev3["PhC"] = prokopbev3["PhC"][1::]
cols_to_drop = []
for col in prokopbev3.columns:
    prokopbev3[col+'_mean_3_days'] = np.asarray([prokopbev3.loc[i-1:i+1, col].mean() for i in prokopbev3.index])
#     data.drop(columns=[col], inplace=True)
    cols_to_drop.append(col)
OLD_PHC_COLNAME = "PhC_mean_3_days"
NEW_PHC_COLNAME = 'PhC_mean_target_3_days'
prokopbev3[NEW_PHC_COLNAME] = prokopbev3[OLD_PHC_COLNAME][3::].tolist() + 3*[np.nan]
prokopbev3[OLD_PHC_COLNAME] = np.asarray([prokopbev3.loc[i-2:i, "PhC"].mean() for i in prokopbev3.index])
# data['const'] = np.full(data.shape[0], 1)
# cols_to_drop.append(OLD_PHC_COLNAME)
prokopbev3.drop(columns=cols_to_drop, inplace=True)
prokopbev3.dropna(inplace=True)

prokopbev3['PhC'] = prokopbev3[NEW_PHC_COLNAME]
prokopbev3.drop(columns=[NEW_PHC_COLNAME], inplace=True)

prokopbev3.head()

Unnamed: 0,PhC_mean_3_days,TRIMP1_mean_3_days,TRIMP2_mean_3_days,TRIMP3_mean_3_days,TRIMP4_mean_3_days,PhC
18,0.368382,609.525069,536.251878,2181.624106,1167.594619,0.342096
19,0.389854,740.071903,518.124858,1660.213104,1064.253069,0.342096
21,0.376711,463.190704,278.563599,1473.378747,1693.864059,0.388945
22,0.342096,422.042549,287.030827,1273.203698,1683.720331,0.378316
23,0.342096,659.363688,247.823057,1774.827269,890.98291,0.386947


In [51]:
spacemen = {"Artem'ev":artembev3, "Prokop'ev":prokopbev3, "Volkov":volkov3}
p_vals3 = compute_interaction_p_values(spacemen)

In [52]:
men = np.unique(p_vals3['spaceman'])
p_vals3[p_vals3['spaceman'] == men[0]]

Unnamed: 0,spaceman,factor#1,factor#2,P
0,Artem'ev,TRIMP1_mean_3_days,TRIMP2_mean_3_days,0.994864
1,Artem'ev,TRIMP1_mean_3_days,TRIMP3_mean_3_days,0.078375
2,Artem'ev,TRIMP1_mean_3_days,TRIMP4_mean_3_days,0.027527
3,Artem'ev,TRIMP1_mean_3_days,PhC_mean_3_days,0.749764
4,Artem'ev,TRIMP2_mean_3_days,TRIMP3_mean_3_days,0.063771
5,Artem'ev,TRIMP2_mean_3_days,TRIMP4_mean_3_days,0.220408
6,Artem'ev,TRIMP2_mean_3_days,PhC_mean_3_days,0.441549
7,Artem'ev,TRIMP3_mean_3_days,TRIMP4_mean_3_days,0.34602
8,Artem'ev,TRIMP3_mean_3_days,PhC_mean_3_days,0.465843
9,Artem'ev,TRIMP4_mean_3_days,PhC_mean_3_days,0.966609


In [53]:
p_vals3[p_vals3['spaceman'] == men[1]]

Unnamed: 0,spaceman,factor#1,factor#2,P
10,Prokop'ev,PhC_mean_3_days,TRIMP1_mean_3_days,0.031428
11,Prokop'ev,PhC_mean_3_days,TRIMP2_mean_3_days,0.652924
12,Prokop'ev,PhC_mean_3_days,TRIMP3_mean_3_days,0.030323
13,Prokop'ev,PhC_mean_3_days,TRIMP4_mean_3_days,0.626827
14,Prokop'ev,TRIMP1_mean_3_days,TRIMP2_mean_3_days,0.088894
15,Prokop'ev,TRIMP1_mean_3_days,TRIMP3_mean_3_days,0.079706
16,Prokop'ev,TRIMP1_mean_3_days,TRIMP4_mean_3_days,0.200988
17,Prokop'ev,TRIMP2_mean_3_days,TRIMP3_mean_3_days,0.0471
18,Prokop'ev,TRIMP2_mean_3_days,TRIMP4_mean_3_days,0.459477
19,Prokop'ev,TRIMP3_mean_3_days,TRIMP4_mean_3_days,0.825614


In [54]:
p_vals3[p_vals3['spaceman'] == men[2]]

Unnamed: 0,spaceman,factor#1,factor#2,P
20,Volkov,PhC_mean_3_days,TRIMP1_mean_3_days,0.718843
21,Volkov,PhC_mean_3_days,TRIMP2_mean_3_days,0.46648
22,Volkov,PhC_mean_3_days,TRIMP3_mean_3_days,0.371913
23,Volkov,PhC_mean_3_days,TRIMP4_mean_3_days,0.912338
24,Volkov,TRIMP1_mean_3_days,TRIMP2_mean_3_days,0.185161
25,Volkov,TRIMP1_mean_3_days,TRIMP3_mean_3_days,0.828596
26,Volkov,TRIMP1_mean_3_days,TRIMP4_mean_3_days,0.996739
27,Volkov,TRIMP2_mean_3_days,TRIMP3_mean_3_days,0.941164
28,Volkov,TRIMP2_mean_3_days,TRIMP4_mean_3_days,0.868233
29,Volkov,TRIMP3_mean_3_days,TRIMP4_mean_3_days,0.827322


In [55]:
%cd ..

/home/vyacheslav/Projects/course-project


In [56]:
class MeanPredictor:
    def __init__(self):
        self.mean = None
    
    def fit(self, x, y):
        self.mean = np.mean(y)
        
    def predict(self, x):
        return np.full(x.shape[0], self.mean)

In [57]:
from src.tree_boosting import TreeBoosting
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from src import utils
models = {"hybrid-1": TreeBoosting(1), 
          "hybrid-2": TreeBoosting(2),
          "tree-1": DecisionTreeRegressor(max_depth=1),
          "tree-2": DecisionTreeRegressor(max_depth=2),
          "linear": LinearRegression(fit_intercept=True),
          "mean": MeanPredictor()}

In [58]:
artembev3['TRIMP14_mean_3_days'] = artembev3['TRIMP1_mean_3_days']*artembev3['TRIMP4_mean_3_days']
prokopbev3['TRIMP23_mean_3_days'] = prokopbev3['TRIMP2_mean_3_days']*prokopbev3['TRIMP3_mean_3_days']
prokopbev3['TRIMP1PhC_mean_3_days'] = prokopbev3['TRIMP1_mean_3_days']*prokopbev3['PhC_mean_3_days']
prokopbev3['TRIMP3PhC_mean_3_days'] = prokopbev3['TRIMP3_mean_3_days']*prokopbev3['PhC_mean_3_days']
spacemen = {
    "Artem'ev 1*4": artembev3,
    "Prokop'ev 2*3": prokopbev3.drop(columns=['TRIMP3PhC_mean_3_days', 'TRIMP1PhC_mean_3_days']),
    "Prokop'ev 1*PhC": prokopbev3.drop(columns=['TRIMP3PhC_mean_3_days', "TRIMP23_mean_3_days"]),
    "Prokop'ev 3*PhC": prokopbev3.drop(columns=['TRIMP1PhC_mean_3_days', "TRIMP23_mean_3_days"]),
}

In [59]:
ITERATIONS = 100
TEST_SIZE = 0.2
r2_scores = {}

for man in spacemen:
    r2_scores.update({man:{}})
    X, y = utils.convert_to_3day_mean_format(spacemen.get(man))
    for model in models:
        r2_scores_for_this_experiment = []
        for i in range(ITERATIONS):
            r2_scores_for_this_experiment.append(
                utils.train_and_test(
                    models.get(model), X, y, test_size=0.2, random_state=i, draw=False)
            )
        r2_scores.get(man).update({model:r2_scores_for_this_experiment})

In [60]:
for man in r2_scores:
    print(man)
    model='mean'
    print(f'\t{model}, mean R²: {np.mean(r2_scores.get(man).get(model))}')
    print(f'\t{model}, median R²: {np.median(r2_scores.get(man).get(model))}')
#     for model in r2_scores.get(man):
#         print(f'\t{model}, mean R²: {np.mean(r2_scores.get(man).get(model))}')
#         print(f'\t{model}, median R²: {np.median(r2_scores.get(man).get(model))}')

Artem'ev 1*4
	mean, mean R²: -0.07023976254659171
	mean, median R²: -0.0289400646117024
Prokop'ev 2*3
	mean, mean R²: -0.12367954316555481
	mean, median R²: -0.0386654507629538
Prokop'ev 1*PhC
	mean, mean R²: -0.12367954316555481
	mean, median R²: -0.0386654507629538
Prokop'ev 3*PhC
	mean, mean R²: -0.12367954316555481
	mean, median R²: -0.0386654507629538


In [20]:
r2_scores_full = {}

for man in spacemen:
    r2_scores_full.update({man:{}})
    X, y = utils.convert_to_3day_mean_format(spacemen.get(man))
    for model in models:
        r2_scores_full.get(man).update({model:utils.train_and_test_nosplit(models.get(model), X, y, draw=False)})

In [21]:
for man in r2_scores_full:
    print(man)
    for model in r2_scores_full.get(man):
        print(f'\t{model}, full dataset R²: {r2_scores_full.get(man).get(model)}')

Artem'ev 1*4
	hybrid-1, full dataset R²: 0.7066049440681899
	hybrid-2, full dataset R²: 0.7756172933806553
	tree-1, full dataset R²: 0.5020226623471538
	tree-2, full dataset R²: 0.5670300062157089
	linear, full dataset R²: 0.5668924940785179
Prokop'ev 2*3
	hybrid-1, full dataset R²: 0.48692731324634986
	hybrid-2, full dataset R²: 0.7202097778488097
	tree-1, full dataset R²: 0.32805580417917346
	tree-2, full dataset R²: 0.48348565470423965
	linear, full dataset R²: 0.2858219153350764
Prokop'ev 1*PhC
	hybrid-1, full dataset R²: 0.5028454986624493
	hybrid-2, full dataset R²: 0.6638050495050916
	tree-1, full dataset R²: 0.32805580417917346
	tree-2, full dataset R²: 0.48348565470423965
	linear, full dataset R²: 0.2688195529271705
Prokop'ev 3*PhC
	hybrid-1, full dataset R²: 0.4960173829687534
	hybrid-2, full dataset R²: 0.7290386767968039
	tree-1, full dataset R²: 0.3280558041791737
	tree-2, full dataset R²: 0.48348565470423965
	linear, full dataset R²: 0.2670449673223484
