In [6]:
import re
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import joblib  # For model saving and loading
import pickle
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report, f1_score
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np
import time
import pickle
import re
import joblib
from collections import Counter
import math

class ClassificationModelPipeline:
    def __init__(self, data_path, target='T0', ftrz = ['Y','M','D','W','H','S','SNODP','SWGDN','LWGAB','T2M','SWLAND','GHTSKIN','HFLUX','SPEED','TLML','TSH','EVPSOIL','LWLAND','TS','QV2M','SLP'], seed=42, test_size=0.2, n_splits=2, export=1, use_smote=False):
        self.data_path = data_path
        self.prdHrz = int(re.search(r'(\d+)Days', self.data_path).group(1))
        self.loc = str(re.search(r'_(\w+)_', self.data_path).group(1))
        self.target = target
        self.ftrz = ftrz
        self.seed = seed
        self.test_size = test_size
        self.n_splits = n_splits
        self.export = export
        self.use_smote = use_smote
        self.models = {
            "Random Forest Classifier": RandomForestClassifier(n_jobs=-1, random_state=seed),
#             "Extra Trees Classifier": ExtraTreesClassifier(n_jobs=-1, random_state=seed),
            "SGD Classifier": SGDClassifier(random_state=seed),
            "KNeighbors Classifier": KNeighborsClassifier(),
            "Support Vector Classifier": SVC(),
            "MLP Classifier": MLPClassifier(random_state=seed, max_iter=1000),
#             "HistGradientBoosting Classifier": HistGradientBoostingClassifier(random_state=seed)
        }
        self.results = pd.DataFrame(columns=["Model", "Train Time (s)", "Train wAccuracy", "Train f1w", "Train FAR", "Test Time (s)", "Test wAccuracy", "Test f1w", "Test FAR"])

    def load_and_prepare_data(self):
        print('1. Loading data', end=' ')
        data = pd.read_csv(self.data_path, header=0, usecols= [self.target] + self.ftrz )
        if ~all(isinstance(x, int) for x in data[self.target]):
            data[f"{self.target}"] = pd.cut(data[f'{self.target}'],bins=[-math.inf,0,+math.inf],labels=[0,1])

        print(f"(#samples={len(data)}, #features={data.loc[:, data.columns != self.target].shape[1]}, target='{self.target}')")
    
        # Splitting data into features and target
        X = data.loc[:, data.columns != self.target].values
        y = data[self.target].values

        # Splitting into train and test sets
        print('2. Splitting data', end=' ')
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.seed, stratify=y)
        print(f'(#training samples={len(X_train)}|{Counter(y_train)}, #testing samples={len(X_test)}|{Counter(y_test)})')

        # Export data and scaler if specified
        if self.export == 1:
            print('2-1. Exporting data...')
            train_data = pd.DataFrame(X_train, columns=data.columns[data.columns != self.target])
            train_data[self.target] = y_train
            test_data = pd.DataFrame(X_test, columns=data.columns[data.columns != self.target])
            test_data[self.target] = y_test

            if self.use_smote == 0:
                train_data.to_csv(f'FTstates_{self.loc}_{self.target}_Trainset_{self.prdHrz}Days.csv', index=False)
                test_data.to_csv(f'FTstates_{self.loc}_{self.target}_Testset_{self.prdHrz}Days.csv', index=False)
            else:
                train_data.to_csv(f'FTstates_{self.loc}_{self.target}_Trainset_{self.prdHrz}Days_SMOTE.csv', index=False)
                test_data.to_csv(f'FTstates_{self.loc}_{self.target}_Testset_{self.prdHrz}Days_SMOTE.csv', index=False)               

        # Standardizing features
        print('3. Standardizing X...')
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test
        self.scaler = scaler
        self.data = data

        # Handling imbalance with SMOTE if specified
        if self.use_smote:
            print('4. Applying SMOTE for class balancing...',end=' ')
            smote = SMOTE(random_state=self.seed)
            self.X_train, self.y_train = smote.fit_resample(X_train, y_train)
            print(f'before {Counter(y_train)}| after {Counter(self.y_train)}')

        # Export data and scaler if specified
        if self.export == 1:
            print('5. Exporting scaler...')
            if self.use_smote == 0:
                with open(f'FTstates_{self.loc}_{self.target}_StdScaler_{self.prdHrz}Days.pkl', 'wb') as f:
                    pickle.dump(self.scaler, f)
            else:
                with open(f'FTstates_{self.loc}_{self.target}_StdScaler_{self.prdHrz}Days_SMOTE.pkl', 'wb') as f:
                    pickle.dump(self.scaler, f)                    

    def A_initial_benchmark(self):
        print(f'6. Initial benchmark | Model learning:')
        for name, model in self.models.items():
            print(f'\t-{name}', end=' ')
            if self.n_splits == 1:
                start_train = time.time()
                model.fit(self.X_train, self.y_train)
                end_train = time.time()
                y_train_pred = model.predict(self.X_train)
            else:
                kf = StratifiedKFold(n_splits=self.n_splits, shuffle=True, random_state=self.seed)
                y_train_pred = cross_val_predict(model, self.X_train, self.y_train, cv=kf)
                start_train = time.time()
                model.fit(self.X_train, self.y_train)
                end_train = time.time()

            start_test = time.time()
            y_test_pred = model.predict(self.X_test)
            end_test = time.time()

            train_accuracy = balanced_accuracy_score(self.y_train, y_train_pred)
            test_accuracy = balanced_accuracy_score(self.y_test, y_test_pred)
            
            # Compute False Alarm Rates
            tn, fp, fn, tp = confusion_matrix(self.y_train, y_train_pred).ravel()
            train_FAR = fp / (fp + tn)
            train_f1 = f1_score(self.y_train, y_train_pred, average='weighted')

            tn, fp, fn, tp = confusion_matrix(self.y_test, y_test_pred).ravel()
            test_FAR = fp / (fp + tn)
            test_f1 = f1_score(self.y_test, y_test_pred, average='weighted')
            
            train_time = end_train - start_train
            test_time = end_test - start_test

            self.results = pd.concat([self.results, pd.DataFrame({
                "Model": [name],
                "Train Time (s)": [train_time],
                "Train wAccuracy": [train_accuracy],
                "Train f1w": [train_f1],
                "Train FAR": [train_FAR],
                "Test Time (s)": [test_time],
                "Test wAccuracy": [test_accuracy],
                "Test f1w": [test_f1],
                "Test FAR": [test_FAR]
            })], ignore_index=True)
            print(f'[Test Accuracy={test_accuracy}, FAR={test_FAR}]')

    def X_SelectFeatures_ML(self):
        self.results_B = self.results
        best_model_name = 'Extra Trees Classifier'#self.results.iloc[0]['Model']
        best_model = ExtraTreesClassifier(n_jobs=-1, random_state=self.seed)#self.models[best_model_name]
#         self.X_train = self.X_train[:, [self.data.columns.get_loc(c) for c in ['GHTSKIN','H','M','S','SNODP','W','Y'] if c in self.data]]
#         self.X_test = self.X_test[:, [self.data.columns.get_loc(c) for c in ['GHTSKIN','H','M','S','SNODP','W','Y'] if c in self.data]]
        print(f'5. {best_model_name} baseline | Model learning:',end=' ')
        if self.n_splits == 1:
            start_train = time.time()
            best_model.fit(self.X_train, self.y_train)
            end_train = time.time()
            y_train_pred = best_model.predict(self.X_train)
        else:
            kf = StratifiedKFold(n_splits=self.n_splits, shuffle=True, random_state=self.seed)
            y_train_pred = cross_val_predict(best_model, self.X_train, self.y_train, cv=kf)
            start_train = time.time()
            best_model.fit(self.X_train, self.y_train)
            end_train = time.time()

        start_test = time.time()
        y_test_pred = best_model.predict(self.X_test)
        end_test = time.time()
        
        # Export predictions
        print(f'7. Exporting {best_model_name} and test data...')
        with open(f'FTstates_{self.loc}_{self.target}_ET_{self.prdHrz}Days.pkl', 'wb') as f:
            pickle.dump(best_model, f)
        X_test_unstandardized = self.scaler.inverse_transform(self.X_test)
        tmp1 = pd.DataFrame(X_test_unstandardized, columns=self.ftrz)
        tmp = pd.DataFrame({
            'Targets': self.y_test,
            'Predicted': y_test_pred
        })
        tmp = pd.concat([tmp1, tmp], axis=1)
        tmp.to_csv(f'FTstates_{self.loc}_{self.target}_testresults_{self.prdHrz}Days.csv', index=False)


        train_accuracy = balanced_accuracy_score(self.y_train, y_train_pred)
        test_accuracy = balanced_accuracy_score(self.y_test, y_test_pred)
        
        # Compute False Alarm Rates
        tn, fp, fn, tp = confusion_matrix(self.y_train, y_train_pred).ravel()
        train_FAR = fp / (fp + tn)
        train_f1 = f1_score(self.y_train, y_train_pred, average='weighted')

        tn, fp, fn, tp = confusion_matrix(self.y_test, y_test_pred).ravel()
        test_FAR = fp / (fp + tn)
        test_f1 = f1_score(self.y_test, y_test_pred, average='weighted')
        
        train_time = end_train - start_train
        test_time = end_test - start_test

        self.results = pd.concat([self.results, pd.DataFrame({
            "Model": [best_model_name],
            "Train Time (s)": [train_time],
            "Train wAccuracy": [train_accuracy],
            "Train f1w": [train_f1],
            "Train FAR": [train_FAR],
            "Test Time (s)": [test_time],
            "Test wAccuracy": [test_accuracy],
            "Test f1w": [test_f1],
            "Test FAR": [test_FAR]
        })], ignore_index=True)
        print(f'[Test Accuracy={test_accuracy}, FAR={test_FAR}]')
        
        
    def display_results(self):
        if not self.results.empty:
            self.results = self.results.sort_values(by="Test wAccuracy", ascending=False)
            display(self.results)

    def display_results_unsorted(self):
        if not self.results.empty:
#             self.results = self.results.sort_values(by="Test wAccuracy", ascending=False)
            display(self.results)
            
    def save_best_model(self):
        best_model_name = self.results.iloc[0]['Model']
        best_model = self.models[best_model_name]
        path = f'classification_{self.prdHrz}Days_bestmodel.pkl'
        joblib.dump(best_model, path)
        print(f"Best model '{best_model_name}' saved to '{path}'")

    def load_model(self):
        with open(f'classification_StdScaler_{self.prdHrz}Days.pkl', 'rb') as f:
            loaded_scaler = pickle.load(f)
        return joblib.load(f'classification_{self.prdHrz}Days_bestmodel.pkl')


In [3]:

if __name__ == "__main__":
    for hrz in list([0,7,30,90]):
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv' 
        print(f'T0|Horizon={hrz}:')
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T0',  ftrz=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.X_SelectFeatures_ML()
        pipeline.display_results()

#     for hrz in list([0,7,30,90]):
#         print(f'T8|Horizon={hrz}:')
#         data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv' 
#         pipeline = ClassificationModelPipeline(data_path=data_path, target='T8', ftrz=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
#         pipeline.load_and_prepare_data()
#         pipeline.X_SelectFeatures_ML()
#         pipeline.display_results()

#     for hrz in list([0,7,30,90]):
#         print(f'T16|Horizon={hrz}:')
#         data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv' 
#         pipeline = ClassificationModelPipeline(data_path=data_path, target='T16', ftrz=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
#         pipeline.load_and_prepare_data()
#         pipeline.X_SelectFeatures_ML()
#         pipeline.display_results()


T8|Horizon=0:
1. Loading data (#samples=6080, #features=9, target='T8')
2. Splitting data (#training samples=4864|Counter({0: 3232, 1: 1632}), #testing samples=1216|Counter({0: 808, 1: 408}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9815691127936323, FAR=0.009900990099009901]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.149009,0.978648,0.981687,0.012067,0.029918,0.981569,0.984361,0.009901


T8|Horizon=7:
1. Loading data (#samples=6071, #features=9, target='T8')
2. Splitting data (#training samples=4856|Counter({0: 3226, 1: 1630}), #testing samples=1215|Counter({0: 807, 1: 408}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.942775530286464, FAR=0.04337050805452292]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.155088,0.946112,0.950845,0.039678,0.030286,0.942776,0.947419,0.043371


T8|Horizon=30:
1. Loading data (#samples=6048, #features=9, target='T8')
2. Splitting data (#training samples=4838|Counter({0: 3206, 1: 1632}), #testing samples=1210|Counter({0: 802, 1: 408}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9180235685296563, FAR=0.05610972568578554]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.156592,0.917533,0.925645,0.057704,0.030243,0.918024,0.926468,0.05611


T8|Horizon=90:
1. Loading data (#samples=5988, #features=9, target='T8')
2. Splitting data (#training samples=4790|Counter({0: 3158, 1: 1632}), #testing samples=1198|Counter({0: 790, 1: 408}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9527860511293125, FAR=0.04050632911392405]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.168638,0.951272,0.954574,0.038632,0.030963,0.952786,0.955054,0.040506


In [14]:

if __name__ == "__main__":
    for hrz in list([0,7,30,90]):
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv' 
        print(f'T0|Horizon={hrz}:')
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T0',  ftrz=['Y','M','H','S','SNODP','GHTSKIN','TSH'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.X_SelectFeatures_ML()
        pipeline.display_results()

    for hrz in list([0,7,30,90]):
        print(f'T8|Horizon={hrz}:')
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv' 
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T_07', ftrz=['Y','M','D','SNODP','EVPSOIL'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.X_SelectFeatures_ML()
        pipeline.display_results()

    for hrz in list([0,7,30,90]):
        print(f'T16|Horizon={hrz}:')
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv' 
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T_12', ftrz=['Y','M','D','H'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.X_SelectFeatures_ML()
        pipeline.display_results()


T0|Horizon=0:
1. Loading data (#samples=4987, #features=7, target='T0')
2. Splitting data (#training samples=3989|Counter({0: 2820, 1: 1169}), #testing samples=998|Counter({0: 706, 1: 292}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9685329271605417, FAR=0.018413597733711047]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.147125,0.977155,0.980461,0.014894,0.03499,0.968533,0.973948,0.018414


T0|Horizon=7:
1. Loading data (#samples=4980, #features=7, target='T0')
2. Splitting data (#training samples=3984|Counter({0: 2815, 1: 1169}), #testing samples=996|Counter({0: 704, 1: 292}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9696353518057286, FAR=0.01278409090909091]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.143512,0.965706,0.972362,0.018117,0.034292,0.969635,0.976849,0.012784


T0|Horizon=30:
1. Loading data (#samples=4957, #features=7, target='T0')
2. Splitting data (#training samples=3965|Counter({0: 2796, 1: 1169}), #testing samples=992|Counter({0: 700, 1: 292}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9695890410958904, FAR=0.03]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.147247,0.962955,0.966806,0.027897,0.034833,0.969589,0.969932,0.03


T0|Horizon=90:
1. Loading data (#samples=4897, #features=7, target='T0')
2. Splitting data (#training samples=3917|Counter({0: 2748, 1: 1169}), #testing samples=980|Counter({0: 688, 1: 292}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9761269512583626, FAR=0.020348837209302327]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.144834,0.964394,0.970378,0.020742,0.034524,0.976127,0.977616,0.020349


T8|Horizon=0:
1. Loading data (#samples=4987, #features=5, target='T_07')
2. Splitting data (#training samples=3989|Counter({0: 2462, 1: 1527}), #testing samples=998|Counter({0: 616, 1: 382}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9882708914122527, FAR=0.012987012987012988]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.148429,0.989605,0.989479,0.010967,0.034487,0.988271,0.987988,0.012987


T8|Horizon=7:
1. Loading data (#samples=4980, #features=5, target='T_07')
2. Splitting data (#training samples=3984|Counter({0: 2457, 1: 1527}), #testing samples=996|Counter({0: 614, 1: 382}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9805412964510463, FAR=0.004885993485342019]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.148822,0.988736,0.988712,0.011396,0.035608,0.980541,0.983894,0.004886


T8|Horizon=30:
1. Loading data (#samples=4957, #features=5, target='T_07')
2. Splitting data (#training samples=3965|Counter({0: 2438, 1: 1527}), #testing samples=992|Counter({0: 610, 1: 382}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9918032786885246, FAR=0.01639344262295082]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.139045,0.989717,0.989915,0.009434,0.034716,0.991803,0.989943,0.016393


T8|Horizon=90:
1. Loading data (#samples=4897, #features=5, target='T_07')
2. Splitting data (#training samples=3917|Counter({0: 2390, 1: 1527}), #testing samples=980|Counter({0: 598, 1: 382}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.984984853525714, FAR=0.011705685618729096]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.143634,0.989322,0.989284,0.010879,0.035183,0.984985,0.985714,0.011706


T16|Horizon=0:
1. Loading data (#samples=4987, #features=4, target='T_12')
2. Splitting data (#training samples=3989|Counter({0: 2984, 1: 1005}), #testing samples=998|Counter({0: 746, 1: 252}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.989382526916039, FAR=0.005361930294906166]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.150079,0.983708,0.987963,0.007708,0.035145,0.989383,0.991984,0.005362


T16|Horizon=7:
1. Loading data (#samples=4980, #features=4, target='T_12')
2. Splitting data (#training samples=3984|Counter({0: 2978, 1: 1006}), #testing samples=996|Counter({0: 745, 1: 251}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9940025134361881, FAR=0.004026845637583893]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.147112,0.985197,0.988212,0.008731,0.035077,0.994003,0.994983,0.004027


T16|Horizon=30:
1. Loading data (#samples=4957, #features=4, target='T_12')
2. Splitting data (#training samples=3965|Counter({0: 2960, 1: 1005}), #testing samples=992|Counter({0: 740, 1: 252}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9867438867438867, FAR=0.002702702702702703]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.143762,0.980824,0.984633,0.011486,0.035514,0.986744,0.991914,0.002703


T16|Horizon=90:
1. Loading data (#samples=4897, #features=4, target='T_12')
2. Splitting data (#training samples=3917|Counter({0: 2912, 1: 1005}), #testing samples=980|Counter({0: 728, 1: 252}))
3. Standardizing X...
5. Extra Trees Classifier baseline | Model learning: 7. Exporting Extra Trees Classifier and test data...
[Test Accuracy=0.9747405372405373, FAR=0.006868131868131868]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Extra Trees Classifier,0.137276,0.985775,0.989024,0.007555,0.035298,0.974741,0.983609,0.006868


## Benchmark with other conventional ML models

In [7]:

if __name__ == "__main__":
    for hrz in list([0,7,30,90]):
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv' 
        print(f'T0|Horizon={hrz}:')
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T0',  ftrz=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.A_initial_benchmark()
        pipeline.display_results_unsorted()

    for hrz in list([0,7,30,90]):
        print(f'T8|Horizon={hrz}:')
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv' 
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T8', ftrz=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.A_initial_benchmark()
        pipeline.display_results_unsorted()

    for hrz in list([0,7,30,90]):
        print(f'T16|Horizon={hrz}:')
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv' 
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T16', ftrz=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.A_initial_benchmark()
        pipeline.display_results_unsorted()


T0|Horizon=0:
1. Loading data (#samples=6080, #features=9, target='T0')
2. Splitting data (#training samples=4864|Counter({0: 3190, 1: 1674}), #testing samples=1216|Counter({0: 798, 1: 418}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9785828206880839, FAR=0.021303258145363407]
	-SGD Classifier [Test Accuracy=0.9757917521075415, FAR=0.012531328320802004]
	-KNeighbors Classifier [Test Accuracy=0.9570517202096149, FAR=0.021303258145363407]
	-Support Vector Classifier [Test Accuracy=0.9733994076099339, FAR=0.012531328320802004]
	-MLP Classifier [Test Accuracy=0.9714057871952608, FAR=0.021303258145363407]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.180812,0.975466,0.9786,0.01442,0.032499,0.978583,0.978666,0.021303
1,SGD Classifier,0.00552,0.972062,0.974714,0.019436,0.000491,0.975792,0.979411,0.012531
2,KNeighbors Classifier,0.003927,0.956188,0.963083,0.021317,0.044324,0.957052,0.963709,0.021303
3,Support Vector Classifier,0.080273,0.970374,0.974871,0.015047,0.023136,0.973399,0.977751,0.012531
4,MLP Classifier,4.622818,0.975593,0.978402,0.015361,0.001375,0.971406,0.973699,0.021303


T0|Horizon=7:
1. Loading data (#samples=6071, #features=9, target='T0')
2. Splitting data (#training samples=4856|Counter({0: 3184, 1: 1672}), #testing samples=1215|Counter({0: 797, 1: 418}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9426257556746891, FAR=0.02383939774153074]
	-SGD Classifier [Test Accuracy=0.9126629165591063, FAR=0.026348808030112924]
	-KNeighbors Classifier [Test Accuracy=0.9394304599184742, FAR=0.03262233375156838]
	-Support Vector Classifier [Test Accuracy=0.9352145905999172, FAR=0.033877038895859475]
	-MLP Classifier [Test Accuracy=0.9452942553715188, FAR=0.037641154328732745]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.190144,0.938393,0.94575,0.037688,0.032192,0.942626,0.952817,0.023839
1,SGD Classifier,0.008932,0.925203,0.932455,0.051508,0.000503,0.912663,0.930782,0.026349
2,KNeighbors Classifier,0.004092,0.920636,0.930042,0.048681,0.044569,0.93943,0.94798,0.032622
3,Support Vector Classifier,0.136731,0.933331,0.940632,0.043028,0.039964,0.935215,0.944643,0.033877
4,MLP Classifier,9.374018,0.938414,0.944205,0.043028,0.001307,0.945294,0.950617,0.037641


T0|Horizon=30:
1. Loading data (#samples=6048, #features=9, target='T0')
2. Splitting data (#training samples=4838|Counter({0: 3165, 1: 1673}), #testing samples=1210|Counter({0: 791, 1: 419}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9425020140060163, FAR=0.029077117572692796]
	-SGD Classifier [Test Accuracy=0.9230423408935247, FAR=0.08470290771175727]
	-KNeighbors Classifier [Test Accuracy=0.9144175675634902, FAR=0.051833122629582805]
	-Support Vector Classifier [Test Accuracy=0.9397608537575167, FAR=0.041719342604298354]
	-MLP Classifier [Test Accuracy=0.9375869341548265, FAR=0.03413400758533502]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.187913,0.936917,0.944112,0.039494,0.034436,0.942502,0.951054,0.029077
1,SGD Classifier,0.007298,0.916282,0.921499,0.067615,0.000489,0.923042,0.921406,0.084703
2,KNeighbors Classifier,0.003937,0.914447,0.922875,0.058136,0.044165,0.914418,0.924598,0.051833
3,Support Vector Classifier,0.148372,0.93211,0.93726,0.051501,0.043255,0.939761,0.945455,0.041719
4,MLP Classifier,10.080127,0.937464,0.943949,0.04139,0.001347,0.937587,0.946109,0.034134


T0|Horizon=90:
1. Loading data (#samples=5988, #features=9, target='T0')
2. Splitting data (#training samples=4790|Counter({0: 3117, 1: 1673}), #testing samples=1198|Counter({0: 779, 1: 419}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9533886231966201, FAR=0.03594351732991014]
	-SGD Classifier [Test Accuracy=0.7756210918471451, FAR=0.10269576379974327]
	-KNeighbors Classifier [Test Accuracy=0.8963897169432691, FAR=0.044929396662387676]
	-Support Vector Classifier [Test Accuracy=0.9283289573254984, FAR=0.03594351732991014]
	-MLP Classifier [Test Accuracy=0.9394395237759688, FAR=0.04236200256739409]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.244702,0.939712,0.94667,0.036894,0.055568,0.953389,0.956642,0.035944
1,SGD Classifier,0.009,0.786618,0.803065,0.161373,0.000478,0.775621,0.808051,0.102696
2,KNeighbors Classifier,0.004199,0.886081,0.903274,0.053898,0.047378,0.89639,0.913153,0.044929
3,Support Vector Classifier,0.167354,0.922597,0.936201,0.030478,0.049458,0.928329,0.938763,0.035944
4,MLP Classifier,5.548453,0.932036,0.940736,0.038499,0.001293,0.93944,0.944908,0.042362


T8|Horizon=0:
1. Loading data (#samples=6080, #features=9, target='T8')
2. Splitting data (#training samples=4864|Counter({0: 3232, 1: 1632}), #testing samples=1216|Counter({0: 808, 1: 408}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9815569792273344, FAR=0.012376237623762377]
	-SGD Classifier [Test Accuracy=0.9760726072607261, FAR=0.006188118811881188]
	-KNeighbors Classifier [Test Accuracy=0.9692778101339545, FAR=0.017326732673267328]
	-Support Vector Classifier [Test Accuracy=0.9834134148708988, FAR=0.008663366336633664]
	-MLP Classifier [Test Accuracy=0.9821636575422248, FAR=0.013613861386138614]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.187101,0.97681,0.980443,0.012067,0.033022,0.981557,0.983553,0.012376
1,SGD Classifier,0.004909,0.971413,0.974507,0.019183,0.000521,0.976073,0.98184,0.006188
2,KNeighbors Classifier,0.003949,0.967288,0.9724,0.017017,0.041009,0.969278,0.973652,0.017327
3,Support Vector Classifier,0.062573,0.974347,0.97798,0.014542,0.018445,0.983413,0.986007,0.008663
4,MLP Classifier,5.703276,0.975569,0.978609,0.015161,0.001314,0.982164,0.983563,0.013614


T8|Horizon=7:
1. Loading data (#samples=6071, #features=9, target='T8')
2. Splitting data (#training samples=4856|Counter({0: 3226, 1: 1630}), #testing samples=1215|Counter({0: 807, 1: 408}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9360285006195787, FAR=0.04460966542750929]
	-SGD Classifier [Test Accuracy=0.913956009913259, FAR=0.04708798017348203]
	-KNeighbors Classifier [Test Accuracy=0.9286618922662002, FAR=0.04708798017348203]
	-Support Vector Classifier [Test Accuracy=0.9268031562067206, FAR=0.05080545229244114]
	-MLP Classifier [Test Accuracy=0.924958087324149, FAR=0.05204460966542751]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.182338,0.939977,0.946652,0.039678,0.034554,0.936029,0.942422,0.04461
1,SGD Classifier,0.007365,0.927513,0.93572,0.047427,0.000523,0.913956,0.926449,0.047088
2,KNeighbors Classifier,0.004188,0.937026,0.942434,0.046807,0.0408,0.928662,0.936606,0.047088
3,Support Vector Classifier,0.121295,0.938095,0.942879,0.047737,0.034087,0.926803,0.934196,0.050805
4,MLP Classifier,8.899515,0.934272,0.940929,0.045567,0.002683,0.924958,0.932551,0.052045


T8|Horizon=30:
1. Loading data (#samples=6048, #features=9, target='T8')
2. Splitting data (#training samples=4838|Counter({0: 3206, 1: 1632}), #testing samples=1210|Counter({0: 802, 1: 408}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9087575179697814, FAR=0.06483790523690773]
	-SGD Classifier [Test Accuracy=0.896352867830424, FAR=0.08229426433915212]
	-KNeighbors Classifier [Test Accuracy=0.8890854970417095, FAR=0.07231920199501247]
	-Support Vector Classifier [Test Accuracy=0.9074464573859469, FAR=0.07481296758104738]
	-MLP Classifier [Test Accuracy=0.9001363014033543, FAR=0.06982543640897755]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.251673,0.914452,0.922954,0.059576,0.064369,0.908758,0.917454,0.064838
1,SGD Classifier,0.007529,0.886724,0.898535,0.077043,0.000461,0.896353,0.903722,0.082294
2,KNeighbors Classifier,0.004091,0.905356,0.914704,0.066126,0.040348,0.889085,0.901563,0.072319
3,Support Vector Classifier,0.158596,0.916129,0.923836,0.060512,0.046893,0.907446,0.913597,0.074813
4,MLP Classifier,6.994016,0.915934,0.922308,0.06519,0.001271,0.900136,0.909998,0.069825


T8|Horizon=90:
1. Loading data (#samples=5988, #features=9, target='T8')
2. Splitting data (#training samples=4790|Counter({0: 3158, 1: 1632}), #testing samples=1198|Counter({0: 790, 1: 408}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9570551005212211, FAR=0.04177215189873418]
	-SGD Classifier [Test Accuracy=0.840270538595185, FAR=0.06455696202531645]
	-KNeighbors Classifier [Test Accuracy=0.8989048150905932, FAR=0.0379746835443038]
	-Support Vector Classifier [Test Accuracy=0.929663067758749, FAR=0.030379746835443037]
	-MLP Classifier [Test Accuracy=0.9382414991312981, FAR=0.030379746835443037]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.182771,0.949878,0.953324,0.039582,0.032569,0.957055,0.957609,0.041772
1,SGD Classifier,0.011697,0.855748,0.879939,0.063015,0.000525,0.840271,0.868224,0.064557
2,KNeighbors Classifier,0.003937,0.881913,0.900921,0.055415,0.040662,0.898905,0.918035,0.037975
3,Support Vector Classifier,0.178117,0.918562,0.933068,0.034199,0.049334,0.929663,0.942021,0.03038
4,MLP Classifier,4.060909,0.931644,0.940566,0.039899,0.001277,0.938241,0.948023,0.03038


T16|Horizon=0:
1. Loading data (#samples=6080, #features=9, target='T16')
2. Splitting data (#training samples=4864|Counter({0: 3267, 1: 1597}), #testing samples=1216|Counter({0: 817, 1: 399}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9795418779506906, FAR=0.01835985312117503]
	-SGD Classifier [Test Accuracy=0.9644751413417264, FAR=0.01591187270501836]
	-KNeighbors Classifier [Test Accuracy=0.9763070466864836, FAR=0.009791921664626682]
	-Support Vector Classifier [Test Accuracy=0.980765868158769, FAR=0.01591187270501836]
	-MLP Classifier [Test Accuracy=0.985078976511045, FAR=0.009791921664626682]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.181292,0.980429,0.982529,0.013468,0.032454,0.979542,0.9803,0.01836
1,SGD Classifier,0.006407,0.97279,0.976547,0.016223,0.000445,0.964475,0.971132,0.015912
2,KNeighbors Classifier,0.003993,0.974704,0.976792,0.019284,0.044792,0.976307,0.981042,0.009792
3,Support Vector Classifier,0.064282,0.976881,0.978642,0.018059,0.019216,0.980766,0.981931,0.015912
4,MLP Classifier,6.272275,0.979789,0.982518,0.012244,0.00125,0.985079,0.986842,0.009792


T16|Horizon=7:
1. Loading data (#samples=6071, #features=9, target='T16')
2. Splitting data (#training samples=4856|Counter({0: 3261, 1: 1595}), #testing samples=1215|Counter({0: 816, 1: 399}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9577528379772962, FAR=0.031862745098039214]
	-SGD Classifier [Test Accuracy=0.9227480465870559, FAR=0.0392156862745098]
	-KNeighbors Classifier [Test Accuracy=0.949704223794781, FAR=0.04044117647058824]
	-Support Vector Classifier [Test Accuracy=0.9423236399823087, FAR=0.05269607843137255]
	-MLP Classifier [Test Accuracy=0.9565549904172195, FAR=0.03676470588235294]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.184925,0.953619,0.959415,0.029439,0.032469,0.957753,0.961378,0.031863
1,SGD Classifier,0.006448,0.934995,0.94159,0.045998,0.000469,0.922748,0.935502,0.039216
2,KNeighbors Classifier,0.00417,0.949932,0.954753,0.036185,0.044322,0.949704,0.953218,0.040441
3,Support Vector Classifier,0.113427,0.94401,0.949801,0.039252,0.03269,0.942324,0.944338,0.052696
4,MLP Classifier,10.292111,0.953486,0.958624,0.031585,0.001244,0.956555,0.958976,0.036765


T16|Horizon=30:
1. Loading data (#samples=6048, #features=9, target='T16')
2. Splitting data (#training samples=4838|Counter({0: 3241, 1: 1597}), #testing samples=1210|Counter({0: 811, 1: 399}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9594037498184425, FAR=0.016029593094944512]
	-SGD Classifier [Test Accuracy=0.9196851561703272, FAR=0.06288532675709001]
	-KNeighbors Classifier [Test Accuracy=0.9288928239217031, FAR=0.03945745992601726]
	-Support Vector Classifier [Test Accuracy=0.9463563347332573, FAR=0.029593094944512947]
	-MLP Classifier [Test Accuracy=0.9475692931465531, FAR=0.02466091245376079]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.187917,0.961073,0.964691,0.028386,0.033492,0.959404,0.96763,0.01603
1,SGD Classifier,0.011317,0.91254,0.918732,0.070349,0.000786,0.919685,0.925894,0.062885
2,KNeighbors Classifier,0.003881,0.932265,0.938712,0.049059,0.044326,0.928893,0.939492,0.039457
3,Support Vector Classifier,0.138391,0.943445,0.950174,0.036717,0.041138,0.946356,0.954442,0.029593
4,MLP Classifier,5.67627,0.955129,0.960511,0.029003,0.001416,0.947569,0.956855,0.024661


T16|Horizon=90:
1. Loading data (#samples=5988, #features=9, target='T16')
2. Splitting data (#training samples=4790|Counter({0: 3193, 1: 1597}), #testing samples=1198|Counter({0: 799, 1: 399}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9705615728934351, FAR=0.016270337922403004]
	-SGD Classifier [Test Accuracy=0.8696961427348096, FAR=0.03504380475594493]
	-KNeighbors Classifier [Test Accuracy=0.9217019394543932, FAR=0.03629536921151439]
	-Support Vector Classifier [Test Accuracy=0.9492598831245824, FAR=0.01877346683354193]
	-MLP Classifier [Test Accuracy=0.9605396469898149, FAR=0.02127659574468085]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.265031,0.96086,0.965339,0.025681,0.065218,0.970562,0.974926,0.01627
1,SGD Classifier,0.010823,0.864112,0.88991,0.053241,0.00047,0.869696,0.899193,0.035044
2,KNeighbors Classifier,0.003855,0.926574,0.93679,0.04228,0.044296,0.921702,0.93532,0.036295
3,Support Vector Classifier,0.158548,0.932055,0.94317,0.033824,0.044548,0.94926,0.959694,0.018773
4,MLP Classifier,6.250063,0.94724,0.953822,0.032884,0.001386,0.96054,0.966547,0.021277


In [8]:

if __name__ == "__main__":
    for hrz in list([0,7,30,90]):
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv' 
        print(f'T0|Horizon={hrz}:')
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T0',  ftrz=['Y','M','H','S','SNODP','GHTSKIN','TSH'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.A_initial_benchmark()
        pipeline.display_results_unsorted()

    for hrz in list([0,7,30,90]):
        print(f'T07|Horizon={hrz}:')
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv' 
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T_07', ftrz=['Y','M','D','SNODP','EVPSOIL'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.A_initial_benchmark()
        pipeline.display_results_unsorted()

    for hrz in list([0,7,30,90]):
        print(f'T12|Horizon={hrz}:')
        data_path=f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv' 
        pipeline = ClassificationModelPipeline(data_path=data_path, target='T_12', ftrz=['Y','M','D','H'], seed=42, test_size=0.2, n_splits=10, export=0, use_smote=False)
        pipeline.load_and_prepare_data()
        pipeline.A_initial_benchmark()
        pipeline.display_results_unsorted()

T0|Horizon=0:
1. Loading data (#samples=4987, #features=7, target='T0')
2. Splitting data (#training samples=3989|Counter({0: 2820, 1: 1169}), #testing samples=998|Counter({0: 706, 1: 292}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9626877255617214, FAR=0.019830028328611898]
	-SGD Classifier [Test Accuracy=0.952889130350421, FAR=0.032577903682719546]
	-KNeighbors Classifier [Test Accuracy=0.9608589778415926, FAR=0.026912181303116147]
	-Support Vector Classifier [Test Accuracy=0.9594425472466918, FAR=0.029745042492917848]
	-MLP Classifier [Test Accuracy=0.9602671814971477, FAR=0.021246458923512748]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.25832,0.973483,0.97843,0.014539,0.032806,0.962688,0.969909,0.01983
1,SGD Classifier,0.004352,0.950743,0.959378,0.028369,0.000134,0.952889,0.959019,0.032578
2,KNeighbors Classifier,0.003138,0.960789,0.966211,0.026241,0.028337,0.960859,0.965999,0.026912
3,Support Vector Classifier,0.051003,0.96929,0.973948,0.019504,0.015386,0.959443,0.964034,0.029745
4,MLP Classifier,3.326302,0.967183,0.973407,0.01773,0.000921,0.960267,0.967903,0.021246


T0|Horizon=7:
1. Loading data (#samples=4980, #features=7, target='T0')
2. Splitting data (#training samples=3984|Counter({0: 2815, 1: 1169}), #testing samples=996|Counter({0: 704, 1: 292}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9713476805728518, FAR=0.01278409090909091]
	-SGD Classifier [Test Accuracy=0.9597310865504358, FAR=0.04971590909090909]
	-KNeighbors Classifier [Test Accuracy=0.9591084215442092, FAR=0.026988636363636364]
	-Support Vector Classifier [Test Accuracy=0.9648291562889165, FAR=0.032670454545454544]
	-MLP Classifier [Test Accuracy=0.9629514321295143, FAR=0.022727272727272728]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.176968,0.966206,0.972376,0.018828,0.032406,0.971348,0.977867,0.012784
1,SGD Classifier,0.005602,0.938038,0.949491,0.034103,0.000131,0.959731,0.956349,0.049716
2,KNeighbors Classifier,0.002779,0.953646,0.959683,0.031972,0.027993,0.959108,0.964912,0.026989
3,Support Vector Classifier,0.052932,0.962215,0.961428,0.040497,0.016103,0.964829,0.96606,0.03267
4,MLP Classifier,3.250875,0.960726,0.967131,0.023801,0.000874,0.962951,0.968891,0.022727


T0|Horizon=30:
1. Loading data (#samples=4957, #features=7, target='T0')
2. Splitting data (#training samples=3965|Counter({0: 2796, 1: 1169}), #testing samples=992|Counter({0: 700, 1: 292}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.968160469667319, FAR=0.032857142857142856]
	-SGD Classifier [Test Accuracy=0.9199119373776907, FAR=0.06428571428571428]
	-KNeighbors Classifier [Test Accuracy=0.9456066536203522, FAR=0.047142857142857146]
	-Support Vector Classifier [Test Accuracy=0.9446086105675147, FAR=0.045714285714285714]
	-MLP Classifier [Test Accuracy=0.9620156555772994, FAR=0.04857142857142857]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.172971,0.959424,0.964274,0.02897,0.032274,0.96816,0.967957,0.032857
1,SGD Classifier,0.004982,0.916911,0.932516,0.044707,0.000133,0.919912,0.927,0.064286
2,KNeighbors Classifier,0.002817,0.945723,0.952712,0.037554,0.028328,0.945607,0.948954,0.047143
3,Support Vector Classifier,0.074617,0.938171,0.949262,0.034692,0.022849,0.944609,0.948908,0.045714
4,MLP Classifier,3.478539,0.961261,0.961453,0.038984,0.001226,0.962016,0.958163,0.048571


T0|Horizon=90:
1. Loading data (#samples=4897, #features=7, target='T0')
2. Splitting data (#training samples=3917|Counter({0: 2748, 1: 1169}), #testing samples=980|Counter({0: 688, 1: 292}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9700541573749601, FAR=0.029069767441860465]
	-SGD Classifier [Test Accuracy=0.9134975310608474, FAR=0.0188953488372093]
	-KNeighbors Classifier [Test Accuracy=0.9705220611659764, FAR=0.024709302325581394]
	-Support Vector Classifier [Test Accuracy=0.9595213443771902, FAR=0.02616279069767442]
	-MLP Classifier [Test Accuracy=0.9691183497929277, FAR=0.0377906976744186]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.17694,0.962565,0.968846,0.021834,0.032406,0.970054,0.970562,0.02907
1,SGD Classifier,0.00902,0.888177,0.925077,0.01492,0.000132,0.913498,0.939773,0.018895
2,KNeighbors Classifier,0.002965,0.9443,0.957154,0.02329,0.027182,0.970522,0.972542,0.024709
3,Support Vector Classifier,0.084405,0.933834,0.952517,0.018559,0.023004,0.959521,0.96534,0.026163
4,MLP Classifier,4.19911,0.962175,0.96467,0.032023,0.000969,0.969118,0.966621,0.037791


T8|Horizon=0:
1. Loading data (#samples=4987, #features=5, target='T_07')
2. Splitting data (#training samples=3989|Counter({0: 2462, 1: 1527}), #testing samples=998|Counter({0: 616, 1: 382}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9806002243829468, FAR=0.017857142857142856]
	-SGD Classifier [Test Accuracy=0.8519540014958863, FAR=0.08928571428571429]
	-KNeighbors Classifier [Test Accuracy=0.9623776093016931, FAR=0.04383116883116883]
	-Support Vector Classifier [Test Accuracy=0.9540779900727545, FAR=0.05519480519480519]
	-MLP Classifier [Test Accuracy=0.9610687087781329, FAR=0.04383116883116883]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.238616,0.982629,0.982718,0.017059,0.032247,0.9806,0.980976,0.017857
1,SGD Classifier,0.00591,0.870883,0.875508,0.110885,0.000132,0.851954,0.864837,0.089286
2,KNeighbors Classifier,0.00242,0.965835,0.965727,0.034931,0.025846,0.962378,0.961057,0.043831
3,Support Vector Classifier,0.092966,0.947777,0.947526,0.054021,0.027243,0.954078,0.95212,0.055195
4,MLP Classifier,4.324397,0.960642,0.96145,0.036149,0.000993,0.961069,0.96005,0.043831


T8|Horizon=7:
1. Loading data (#samples=4980, #features=5, target='T_07')
2. Splitting data (#training samples=3984|Counter({0: 2457, 1: 1527}), #testing samples=996|Counter({0: 614, 1: 382}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9815304330030528, FAR=0.008143322475570033]
	-SGD Classifier [Test Accuracy=0.8849361324760816, FAR=0.05211726384364821]
	-KNeighbors Classifier [Test Accuracy=0.9761498712417074, FAR=0.016286644951140065]
	-Support Vector Classifier [Test Accuracy=0.9600081859576718, FAR=0.04071661237785016]
	-MLP Classifier [Test Accuracy=0.9755103432985999, FAR=0.02280130293159609]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.17337,0.985833,0.986201,0.012617,0.031807,0.98153,0.983911,0.008143
1,SGD Classifier,0.007678,0.887479,0.887729,0.114367,0.000132,0.884936,0.898546,0.052117
2,KNeighbors Classifier,0.002301,0.96326,0.963908,0.034188,0.023941,0.97615,0.977901,0.016287
3,Support Vector Classifier,0.089758,0.950306,0.949004,0.056166,0.026661,0.960008,0.959934,0.040717
4,MLP Classifier,11.991625,0.968967,0.970147,0.026048,0.001381,0.97551,0.975927,0.022801


T8|Horizon=30:
1. Loading data (#samples=4957, #features=5, target='T_07')
2. Splitting data (#training samples=3965|Counter({0: 2438, 1: 1527}), #testing samples=992|Counter({0: 610, 1: 382}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9870569049866964, FAR=0.018032786885245903]
	-SGD Classifier [Test Accuracy=0.9039095356621749, FAR=0.05081967213114754]
	-KNeighbors Classifier [Test Accuracy=0.9649515063084714, FAR=0.036065573770491806]
	-Support Vector Classifier [Test Accuracy=0.9343446914427946, FAR=0.047540983606557376]
	-MLP Classifier [Test Accuracy=0.9495622693331045, FAR=0.04590163934426229]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.175215,0.984719,0.985126,0.013536,0.032232,0.987057,0.985914,0.018033
1,SGD Classifier,0.005708,0.905986,0.918455,0.036095,0.000127,0.90391,0.913777,0.05082
2,KNeighbors Classifier,0.002341,0.967397,0.969959,0.021329,0.02186,0.964952,0.964792,0.036066
3,Support Vector Classifier,0.073832,0.944059,0.95013,0.028712,0.021661,0.934345,0.938462,0.047541
4,MLP Classifier,3.90046,0.961942,0.96345,0.031583,0.000938,0.949562,0.950687,0.045902


T8|Horizon=90:
1. Loading data (#samples=4897, #features=5, target='T_07')
2. Splitting data (#training samples=3917|Counter({0: 2390, 1: 1527}), #testing samples=980|Counter({0: 598, 1: 382}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9817498117634699, FAR=0.023411371237458192]
	-SGD Classifier [Test Accuracy=0.6725253462676636, FAR=0.06856187290969899]
	-KNeighbors Classifier [Test Accuracy=0.9614990631949429, FAR=0.03511705685618729]
	-Support Vector Classifier [Test Accuracy=0.9485939168957608, FAR=0.06354515050167224]
	-MLP Classifier [Test Accuracy=0.9620812831602725, FAR=0.04180602006688963]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.180947,0.983982,0.98393,0.016318,0.032828,0.98175,0.980652,0.023411
1,SGD Classifier,0.008177,0.704956,0.728837,0.159833,0.000129,0.672525,0.704948,0.068562
2,KNeighbors Classifier,0.002541,0.956823,0.958165,0.037238,0.019147,0.961499,0.962288,0.035117
3,Support Vector Classifier,0.105218,0.953094,0.954352,0.041423,0.030514,0.948594,0.946178,0.063545
4,MLP Classifier,6.894106,0.962135,0.963756,0.030544,0.000966,0.962081,0.961328,0.041806


T16|Horizon=0:
1. Loading data (#samples=4987, #features=4, target='T_12')
2. Splitting data (#training samples=3989|Counter({0: 2984, 1: 1005}), #testing samples=998|Counter({0: 746, 1: 252}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9840737903740584, FAR=0.00804289544235925]
	-SGD Classifier [Test Accuracy=0.5, FAR=0.0]
	-KNeighbors Classifier [Test Accuracy=0.9655464062300523, FAR=0.00938337801608579]
	-Support Vector Classifier [Test Accuracy=0.9520883867398613, FAR=0.04423592493297587]
	-MLP Classifier [Test Accuracy=0.9542586918592281, FAR=0.0160857908847185]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.172228,0.981535,0.985226,0.011059,0.032134,0.984074,0.987976,0.008043
1,SGD Classifier,0.011789,0.5,0.640242,0.0,0.000127,0.5,0.639485,0.0
2,KNeighbors Classifier,0.002036,0.962604,0.973127,0.016086,0.021833,0.965546,0.977837,0.009383
3,Support Vector Classifier,0.118077,0.945152,0.945537,0.055965,0.036313,0.952088,0.954474,0.044236
4,MLP Classifier,3.949891,0.954948,0.966631,0.021448,0.000919,0.954259,0.968792,0.016086


T16|Horizon=7:
1. Loading data (#samples=4980, #features=4, target='T_12')
2. Splitting data (#training samples=3984|Counter({0: 2978, 1: 1006}), #testing samples=996|Counter({0: 745, 1: 251}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.985363245006551, FAR=0.005369127516778523]
	-SGD Classifier [Test Accuracy=0.5, FAR=0.0]
	-KNeighbors Classifier [Test Accuracy=0.9800368993823365, FAR=0.008053691275167786]
	-Support Vector Classifier [Test Accuracy=0.9499211208855852, FAR=0.052348993288590606]
	-MLP Classifier [Test Accuracy=0.9547046712478944, FAR=0.026845637583892617]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.178276,0.98237,0.985958,0.01041,0.032672,0.985363,0.989946,0.005369
1,SGD Classifier,0.01031,0.500121,0.641079,0.006716,0.000129,0.5,0.640154,0.0
2,KNeighbors Classifier,0.002193,0.964934,0.974125,0.016454,0.021582,0.980037,0.985925,0.008054
3,Support Vector Classifier,0.11367,0.950898,0.94688,0.059436,0.036391,0.949921,0.949629,0.052349
4,MLP Classifier,4.100596,0.953617,0.964626,0.024177,0.000914,0.954705,0.96395,0.026846


T16|Horizon=30:
1. Loading data (#samples=4957, #features=4, target='T_12')
2. Splitting data (#training samples=3965|Counter({0: 2960, 1: 1005}), #testing samples=992|Counter({0: 740, 1: 252}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9834513084513085, FAR=0.0013513513513513514]
	-SGD Classifier [Test Accuracy=0.5, FAR=0.0]
	-KNeighbors Classifier [Test Accuracy=0.9707850707850707, FAR=0.010810810810810811]
	-Support Vector Classifier [Test Accuracy=0.9273380523380523, FAR=0.05405405405405406]
	-MLP Classifier [Test Accuracy=0.9522093522093522, FAR=0.016216216216216217]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.177149,0.979153,0.983127,0.012838,0.032959,0.983451,0.990885,0.001351
1,SGD Classifier,0.009037,0.500639,0.639564,0.002703,0.00013,0.5,0.637432,0.0
2,KNeighbors Classifier,0.002043,0.955537,0.967417,0.02027,0.021452,0.970785,0.979785,0.010811
3,Support Vector Classifier,0.116053,0.940963,0.944087,0.054392,0.041859,0.927338,0.937159,0.054054
4,MLP Classifier,4.934266,0.951501,0.962464,0.026351,0.0009,0.952209,0.967569,0.016216


T16|Horizon=90:
1. Loading data (#samples=4897, #features=4, target='T_12')
2. Splitting data (#training samples=3917|Counter({0: 2912, 1: 1005}), #testing samples=980|Counter({0: 728, 1: 252}))
3. Standardizing X...
6. Initial benchmark | Model learning:
	-Random Forest Classifier 

  self.results = pd.concat([self.results, pd.DataFrame({


[Test Accuracy=0.9766483516483517, FAR=0.01098901098901099]
	-SGD Classifier [Test Accuracy=0.4952686202686203, FAR=0.04120879120879121]
	-KNeighbors Classifier [Test Accuracy=0.9528388278388278, FAR=0.01098901098901099]
	-Support Vector Classifier [Test Accuracy=0.9342948717948718, FAR=0.04807692307692308]
	-MLP Classifier [Test Accuracy=0.9313186813186813, FAR=0.03021978021978022]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,Random Forest Classifier,0.18017,0.983424,0.986986,0.009272,0.03291,0.976648,0.982642,0.010989
1,SGD Classifier,0.009876,0.497834,0.63377,0.010302,0.000129,0.495269,0.635162,0.041209
2,KNeighbors Classifier,0.002021,0.965976,0.976932,0.011332,0.021484,0.952839,0.970147,0.010989
3,Support Vector Classifier,0.116465,0.94774,0.949634,0.049794,0.035187,0.934295,0.943352,0.048077
4,MLP Classifier,4.754792,0.956329,0.966561,0.022665,0.000985,0.931319,0.949835,0.03022


## Different training approaches

In [102]:
import re
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import joblib  # For model saving and loading
import pickle
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report, f1_score
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np
import time
import pickle
import re
import joblib
from collections import Counter
import math

class FlexibleDatasetClassificationPipeline:
    def __init__(self, data_path1, data_path2=None, ftrz1=None, ftrz2=None, combine_datasets=False, use_second_for_testing=False, 
                 seed=42, test_size=0.2, n_splits=2, export=1, use_smote=False, depths=['T0', 'T_07', 'T_12'], test_depth=None):
        # Default features if not provided
        if ftrz1 is None:
            ftrz1 = ['Y', 'M', 'D', 'W', 'H', 'S', 'SNODP', 'SWGDN', 'LWGAB', 'T2M', 'SWLAND', 
                     'GHTSKIN', 'HFLUX', 'SPEED', 'TLML', 'TSH', 'EVPSOIL', 'LWLAND', 'TS', 'QV2M', 'SLP']           
            
        self.data_path1 = data_path1
        self.data_path2 = data_path2
        self.combine_datasets = combine_datasets
        self.use_second_for_testing = use_second_for_testing
        self.seed = seed
        self.test_size = test_size
        self.n_splits = n_splits
        self.export = export
        self.use_smote = use_smote
        self.depths = depths # all considered depths 
        self.test_depth = test_depth  # Depth to be used as test data in agnostic depth learning

        # Use ftrz2 only when combine_datasets is True
        if self.combine_datasets & self.use_second_for_testing==False :
            if ftrz2 is None:
                ftrz2 = ['Y', 'M', 'D', 'W', 'H', 'S', 'SNODP', 'SWGDN', 'LWGAB', 'T2M', 'SWLAND', 
                     'GHTSKIN', 'HFLUX', 'SPEED', 'TLML', 'TSH', 'EVPSOIL', 'LWLAND', 'TS', 'QV2M', 'SLP']
            self.ftrz = list(set(ftrz1).union(set(ftrz2)))  # Union of features from both datasets
        elif self.combine_datasets & self.use_second_for_testing:
            if ftrz2 is None:
                ftrz2 = ['Y', 'M', 'D', 'W', 'H', 'S', 'SNODP', 'SWGDN', 'LWGAB', 'T2M', 'SWLAND', 
                     'GHTSKIN', 'HFLUX', 'SPEED', 'TLML', 'TSH', 'EVPSOIL', 'LWLAND', 'TS', 'QV2M', 'SLP']
            self.ftrz = ftrz2  # only the second dataset's features are used!            
        else:
            self.ftrz = ftrz1  # Use only ftrz1 for separate dataset processing

        self.models = {
            "ET": ExtraTreesClassifier(n_jobs=-1, random_state=seed),
            "SGD Classifier": SGDClassifier(random_state=seed),
            "KNeighbors Classifier": KNeighborsClassifier(),
            "Support Vector Classifier": SVC(),
            "MLP Classifier": MLPClassifier(random_state=seed, max_iter=1000)
        }
        self.results = pd.DataFrame(columns=["Model", "Train Time (s)", "Train wAccuracy", "Train f1w", 
                                             "Train FAR", "Test Time (s)", "Test wAccuracy", "Test f1w", "Test FAR"])

    def extract_target_from_path(self, file_path):
        # Use re.search to find the first match in the file path
        match = re.search(r'T(_?\d+)', file_path)

        # If a match is found, return the matched group (i.e., the target)
        if match:
            return match.group(0)
        else:
            raise ValueError(f"no hrz was detected from the provided paths.")

    def load_data(self, data_path):
        print(f'Loading data from {data_path}, ', end='')
        # Extract the target from both file paths
        target = self.extract_target_from_path(data_path)
        self.target = target
        print(f'target is {self.target}, ', end='')        
        data = pd.read_csv(data_path, header=0, usecols=[self.target] + self.ftrz)
        if ~all(isinstance(x, int) for x in data[self.target]):
            data[self.target] = pd.cut(data[self.target], bins=[-math.inf, 0, +math.inf], labels=[0, 1])

        print(f"(#samples={len(data)}, #features={data.loc[:, data.columns != self.target].shape[1]}, target='{self.target}')")
        
        X = data.loc[:, data.columns != self.target].values
        y = data[self.target].values
        return X, y, data

    def process_data(self, X, y, scaler=None, standardize=True):
        if standardize:
            if scaler is None:
#                 print('Standardizing X, ', end='')
                scaler = StandardScaler()
                X = scaler.fit_transform(X)
            else:
                X = scaler.transform(X)
            return X, y, scaler
        return X, y, None

    def split_and_process_data(self, X, y, prevscaler=None):
        # Split into train and test sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size,
                                                            random_state=self.seed, stratify=y)
        # Process training data
        X_train, y_train, scaler = self.process_data(X_train, y_train, standardize=True)
        if prevscaler == None:
            # Standardize testing data using the same scaler
            X_test, y_test, _ = self.process_data(X_test, y_test, scaler=scaler, standardize=True)
        else:
            # Standardize testing data using previous scaler
            X_test, y_test, _ = self.process_data(X_test, y_test, scaler=prevscaler, standardize=True)
            
        return X_train, y_train, X_test, y_test, scaler

    def process_combined_data(self):
        print("Combining datasets...")
        # Load and concatenate both datasets
        X1, y1, _ = self.load_data(self.data_path1)
        X2, y2, _ = self.load_data(self.data_path2)
        
        # Combine the datasets
        X_combined = np.vstack((X1, X2))
        y_combined = np.hstack((y1, y2))
        
        return self.split_and_process_data(X_combined, y_combined)

    def process_separate_data(self):
        print("Loading separate datasets, ",end='')
        # Load training data
        X1, y1, _ = self.load_data(self.data_path1)
#         print(f'X1 size is {X1.shape} | y1 size is {y1.shape}')
        # Load testing data
        X2, y2, _ = self.load_data(self.data_path2)
#         print(f'X2 size is {X2.shape} | y2 size is {y2.shape}')

        print("Standardizing separate datasets")
        # Standardize training data
        X_train, y_train, _, _, scaler = self.split_and_process_data(X1, y1)
        _, _, X_test, y_test, _ = self.split_and_process_data(X2, y2, scaler)
#         print(f"X_train {X_train.shape}, y_train {y_train.shape}, X_test {X_test.shape}, y_test {y_test.shape}")

        return X_train, y_train, X_test, y_test, scaler

    def process_agnostic_depth_data(self):
        print("Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...")

        # Define the depths to use for training and testing
        depths = ['T0', 'T_07', 'T_12']
        
        # Initialize lists for training data across all depths
        all_X_train, all_y_train, all_depth_train = [], [], []

        for depth in self.depths:
            # Load and split data for each depth| self.data_path1 should always be 'T0'
            depth_data_path = self.data_path1.replace('T0', depth)
            X, y, _ = self.load_data(depth_data_path)
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=self.test_size, random_state=self.seed, stratify=y
            )

            # Identify depth in the training data
            depth_train_column = np.full(X_train.shape[0], int(re.findall(r'\d+', depth)[0]))
            all_X_train.append(X_train)
            all_y_train.append(y_train)
            all_depth_train.append(depth_train_column)

            # Use test data only if it matches the specified test depth
            if depth == self.test_depth:
                test_depth_column = np.full(X_test.shape[0], int(re.findall(r'\d+', depth)[0]))
                X_test_final, y_test_final, depth_test_final = X_test, y_test, test_depth_column

        # Concatenate train data from all depths
        X_train_combined = np.vstack(all_X_train)
        y_train_combined = np.hstack(all_y_train)
        depth_train_combined = np.hstack(all_depth_train).reshape(-1, 1)

        # Include depth as a feature in the training and test sets
        X_train_with_depth = np.hstack([X_train_combined, depth_train_combined])
        depth_test_final = depth_test_final.reshape(-1, 1)
        X_test_with_depth = np.hstack([X_test_final, depth_test_final])
        
        # Standardize the training and test data using the same scaler
        X_train_with_depth, y_train_combined, scaler = self.process_data(X_train_with_depth, y_train_combined, standardize=True)
        print(X_train_with_depth.shape, '|', X_train_combined.shape, '|', depth_train_combined.shape)
        X_test_with_depth, y_test_final, _ = self.process_data(X_test_with_depth, y_test_final, scaler=scaler, standardize=True)
        print(X_test_with_depth.shape, '|', X_test_final.shape, '|', depth_test_final.shape)

        return X_train_with_depth, y_train_combined, X_test_with_depth, y_test_final, scaler    
    
    def train_and_test_model(self, model_name="ET"):
        if model_name not in self.models:
            raise ValueError(f"Model '{model_name}' not found in available models.")

        if self.combine_datasets & pd.isna(self.test_depth):
            X_train, y_train, X_test, y_test, scaler = self.process_combined_data()
        elif self.combine_datasets==False:
            if pd.isna(self.test_depth):
                X_train, y_train, X_test, y_test, scaler = self.process_separate_data()
            else:
                X_train, y_train, X_test, y_test, scaler = self.process_agnostic_depth_data()
        
        model = self.models[model_name]

        # Train the model
        print(f'Training {model} model...', end=' ')
        kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=self.seed)
        y_train_pred = cross_val_predict(model, X_train, y_train, cv=kf)
        start_train = time.time()
        model.fit(X_train, y_train)
        end_train = time.time()

        # Test the model
        start_test = time.time()
        y_test_pred = model.predict(X_test)
        end_test = time.time()

        train_accuracy = balanced_accuracy_score(y_train, model.predict(X_train))
        test_accuracy = balanced_accuracy_score(y_test, y_test_pred)

        # Compute False Alarm Rates
        tn, fp, fn, tp = confusion_matrix(y_train, model.predict(X_train)).ravel()
        train_FAR = fp / (fp + tn)
        train_f1 = f1_score(y_train, model.predict(X_train), average='weighted')

        tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred).ravel()
        test_FAR = fp / (fp + tn)
        test_f1 = f1_score(y_test, y_test_pred, average='weighted')

        train_time = end_train - start_train
        test_time = end_test - start_test

        # Save results
        self.results = pd.concat([self.results, pd.DataFrame({
            "Model": [model_name],
            "Train Time (s)": [train_time],
            "Train wAccuracy": [train_accuracy],
            "Train f1w": [train_f1],
            "Train FAR": [train_FAR],
            "Test Time (s)": [test_time],
            "Test wAccuracy": [test_accuracy],
            "Test f1w": [test_f1],
            "Test FAR": [test_FAR]
        })], ignore_index=True)
        print(f'[Test Accuracy={test_accuracy}, FAR={test_FAR}]')

    def display_results(self):
        if not self.results.empty:
            self.results = self.results.sort_values(by="Test wAccuracy", ascending=False)
            display(self.results)


In [103]:
#Toolik
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        ftrz1=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], #toolik features
        seed=42,
        test_size=0.2,
        depths=['T0', 'T8', 'T16'], 
        test_depth='T0'
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()

print('*****************************************************************************************************************')
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        ftrz1=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        depths=['T0', 'T8', 'T16'], 
        test_depth='T8'
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
        
print('*****************************************************************************************************************')
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        ftrz1=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        depths=['T0', 'T8', 'T16'], 
        test_depth='T16'
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
                

Ground|Horizon=0:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=21, target='T16')
(14592, 22) | (14592, 21) | (14592, 1)
(1216, 22) | (1216, 21) | (1216, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9879813169286853, FAR=0.002506265664160401]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.176925,1.0,1.0,0.0,0.029836,0.987981,0.990936,0.002506


Ground|Horizon=7:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=21, target='T16')
(14568, 22) | (14568, 21) | (14568, 1)
(1215, 22) | (1215, 21) | (1215, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9838224081934047, FAR=0.0012547051442910915]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.181589,1.0,1.0,0.0,0.030141,0.983822,0.988437,0.001255


Ground|Horizon=30:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=21, target='T16')
(14514, 22) | (14514, 21) | (14514, 1)
(1210, 22) | (1210, 21) | (1210, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9837838571760467, FAR=0.0037926675094816687]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.187222,1.0,1.0,0.0,0.031927,0.983784,0.987571,0.003793


Ground|Horizon=90:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=21, target='T16')
(14370, 22) | (14370, 21) | (14370, 1)
(1198, 22) | (1198, 21) | (1198, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9819194794133597, FAR=0.005134788189987163]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.194008,1.0,1.0,0.0,0.031379,0.981919,0.985773,0.005135


*****************************************************************************************************************
Ground|Horizon=0:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=21, target='T16')
(14592, 22) | (14592, 21) | (14592, 1)
(1216, 22) | (1216, 21) | (1216, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9877208309066201, FAR=0.0049504950495049506]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.167052,1.0,1.0,0.0,0.029591,0.987721,0.990119,0.00495


Ground|Horizon=7:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=21, target='T16')
(14568, 22) | (14568, 21) | (14568, 1)
(1215, 22) | (1215, 21) | (1215, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9870981849989067, FAR=0.006195786864931847]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.17435,1.0,1.0,0.0,0.030578,0.987098,0.989291,0.006196


Ground|Horizon=30:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=21, target='T16')
(14514, 22) | (14514, 21) | (14514, 1)
(1210, 22) | (1210, 21) | (1210, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9876809202483986, FAR=0.007481296758104738]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.183347,1.0,1.0,0.0,0.030046,0.987681,0.989253,0.007481


Ground|Horizon=90:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=21, target='T16')
(14370, 22) | (14370, 21) | (14370, 1)
(1198, 22) | (1198, 21) | (1198, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9832340531149169, FAR=0.013924050632911392]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.176065,1.0,1.0,0.0,0.031374,0.983234,0.984154,0.013924


*****************************************************************************************************************
Ground|Horizon=0:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=21, target='T16')
(14592, 22) | (14592, 21) | (14592, 1)
(1216, 22) | (1216, 21) | (1216, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9876143847992073, FAR=0.012239902080783354]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.1717,1.0,1.0,0.0,0.030013,0.987614,0.987684,0.01224


Ground|Horizon=7:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=21, target='T16')
(14568, 22) | (14568, 21) | (14568, 1)
(1215, 22) | (1215, 21) | (1215, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9839580569069732, FAR=0.022058823529411766]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.170334,1.0,1.0,0.0,0.031995,0.983958,0.981971,0.022059


Ground|Horizon=30:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=21, target='T16')
(14514, 22) | (14514, 21) | (14514, 1)
(1210, 22) | (1210, 21) | (1210, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9844864936694386, FAR=0.018495684340320593]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.180829,1.0,1.0,0.0,0.029298,0.984486,0.983522,0.018496


Ground|Horizon=90:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=21, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=21, target='T16')
(14370, 22) | (14370, 21) | (14370, 1)
(1198, 22) | (1198, 21) | (1198, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9818397683821569, FAR=0.016270337922403004]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.193003,1.0,1.0,0.0,0.031499,0.98184,0.982498,0.01627


In [99]:
#Deadhorse
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        ftrz1=['Y', 'M', 'H', 'S', 'SNODP', 'GHTSKIN', 'TSH'], #deadhorse features
        seed=42,
        test_size=0.2,
        depths=['T0', 'T_07', 'T_12'], 
        test_depth='T0'
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()

print('*****************************************************************************************************************')
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        ftrz1=['Y','M','D','SNODP','EVPSOIL'], #deadhorse features
        seed=42,
        test_size=0.2,
        depths=['T0', 'T_07', 'T_12'], 
        test_depth='T_07'
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
        
print('*****************************************************************************************************************')
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        ftrz1=['Y', 'M', 'D', 'H'], #deadhorse features
        seed=42,
        test_size=0.2,
        depths=['T0', 'T_07', 'T_12'], 
        test_depth='T_12'
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
                

Ground|Horizon=0:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=21, target='T_12')
(11967, 22) | (11967, 21) | (11967, 1)
(998, 22) | (998, 21) | (998, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9729616981644611, FAR=0.019830028328611898]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.184298,1.0,1.0,0.0,0.031379,0.972962,0.976,0.01983


Ground|Horizon=7:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=21, target='T_12')
(11952, 22) | (11952, 21) | (11952, 1)
(996, 22) | (996, 21) | (996, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9716784713574097, FAR=0.032670454545454544]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.183399,1.0,1.0,0.0,0.035026,0.971678,0.970108,0.03267


Ground|Horizon=30:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=21, target='T_12')
(11895, 22) | (11895, 21) | (11895, 1)
(992, 22) | (992, 21) | (992, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9618786692759296, FAR=0.03857142857142857]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.181834,1.0,1.0,0.0,0.031234,0.961879,0.961983,0.038571


Ground|Horizon=90:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=21, target='T_12')
(11751, 22) | (11751, 21) | (11751, 1)
(980, 22) | (980, 21) | (980, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9720253265371137, FAR=0.03197674418604651]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.177467,1.0,1.0,0.0,0.02773,0.972025,0.970616,0.031977


*****************************************************************************************************************
Ground|Horizon=0:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=21, target='T_12')
(11967, 22) | (11967, 21) | (11967, 1)
(998, 22) | (998, 21) | (998, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9360976065818998, FAR=0.017857142857142856]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.176958,1.0,1.0,0.0,0.029509,0.936098,0.946423,0.017857


Ground|Horizon=7:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=21, target='T_12')
(11952, 22) | (11952, 21) | (11952, 1)
(996, 22) | (996, 21) | (996, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.92213534116684, FAR=0.006514657980456026]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.190374,1.0,1.0,0.0,0.028759,0.922135,0.937739,0.006515


Ground|Horizon=30:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=21, target='T_12')
(11895, 22) | (11895, 21) | (11895, 1)
(992, 22) | (992, 21) | (992, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9317526392584328, FAR=0.021311475409836064]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.196632,1.0,1.0,0.0,0.031731,0.931753,0.942037,0.021311


Ground|Horizon=90:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=21, target='T_12')
(11751, 22) | (11751, 21) | (11751, 1)
(980, 22) | (980, 21) | (980, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9226304085170464, FAR=0.013377926421404682]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.195863,1.0,1.0,0.0,0.030653,0.92263,0.93588,0.013378


*****************************************************************************************************************
Ground|Horizon=0:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=21, target='T_12')
(11967, 22) | (11967, 21) | (11967, 1)
(998, 22) | (998, 21) | (998, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9541523043533767, FAR=0.032171581769437]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.313911,1.0,1.0,0.0,0.029166,0.954152,0.961146,0.032172


Ground|Horizon=7:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=21, target='T_12')
(11952, 22) | (11952, 21) | (11952, 1)
(996, 22) | (996, 21) | (996, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9552688574560817, FAR=0.04563758389261745]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.200527,1.0,1.0,0.0,0.033532,0.955269,0.955455,0.045638


Ground|Horizon=30:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=21, target='T_12')
(11895, 22) | (11895, 21) | (11895, 1)
(992, 22) | (992, 21) | (992, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9484448734448735, FAR=0.05945945945945946]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.201661,1.0,1.0,0.0,0.030458,0.948445,0.945628,0.059459


Ground|Horizon=90:
Performing agnostic learning for depths (same location learning but all depths, test on a specific depth though) ...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=21, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=21, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=21, target='T_12')
(11751, 22) | (11751, 21) | (11751, 1)
(980, 22) | (980, 21) | (980, 1)
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9469627594627594, FAR=0.042582417582417584]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.179706,1.0,1.0,0.0,0.029155,0.946963,0.952484,0.042582


### AGNOSTIC (test from Toolik)

In [104]:
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        use_second_for_testing=True,
        ftrz1=['Y', 'M', 'H', 'S', 'SNODP', 'GHTSKIN', 'TSH'], #deadhorse features
        ftrz2=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
# print(f'*****************************************************************************************************************')    
# for hrz in list([0,7,30,90]):
#     print(f'Mid|Horizon={hrz}:')
#     # Paths to your datasets
#     data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
#     data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

#     # Instantiate the pipeline without combining the datasets
#     pipeline = FlexibleDatasetClassificationPipeline(
#         data_path1=data_path1, 
#         data_path2=data_path2, 
#         combine_datasets=True, 
#         use_second_for_testing=True,
#         ftrz1=['Y','M','D','SNODP','EVPSOIL'], #deadhorse features
#         ftrz2=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], #toolik features
#         seed=42,
#         test_size=0.2,
#         use_smote=False
#     )

#     # Train and test the model using a Random Forest classifier
#     pipeline.train_and_test_model(model_name="ET")

#     # Display the results
#     pipeline.display_results()
    
# print(f'*****************************************************************************************************************')    
# for hrz in list([0,7,30,90]):
#     print(f'Last|Horizon={hrz}:')
#     # Paths to your datasets
#     data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
#     data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

#     # Instantiate the pipeline without combining the datasets
#     pipeline = FlexibleDatasetClassificationPipeline(
#         data_path1=data_path1, 
#         data_path2=data_path2, 
#         combine_datasets=True, 
#         use_second_for_testing=True,
#         ftrz1=['Y','M','D','H'], #deadhorse features
#         ftrz2=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], #toolik features
#         seed=42,
#         test_size=0.2,
#         use_smote=False
#     )

#     # Train and test the model using a Random Forest classifier
#     pipeline.train_and_test_model(model_name="ET")

#     # Display the results
#     pipeline.display_results()
    
    
    

Ground|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=9, target='T0')
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.972363290297043, FAR=0.017298735861610112]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.163103,1.0,1.0,0.0,0.031291,0.972363,0.976057,0.017299


Ground|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=9, target='T0')
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9515782905293184, FAR=0.030646235842771485]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.165927,1.0,1.0,0.0,0.035892,0.951578,0.95793,0.030646


Ground|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=9, target='T0')
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9499622424225262, FAR=0.03959731543624161]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.163366,1.0,1.0,0.0,0.031746,0.949962,0.95379,0.039597


Ground|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=9, target='T0')
Training ExtraTreesClassifier(n_jobs=-1, random_state=42) model... [Test Accuracy=0.9544130147381913, FAR=0.030695770804911322]


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.168264,1.0,1.0,0.0,0.03118,0.954413,0.959592,0.030696


### AGNOSTIC (test from Deadhorse)

In [66]:
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path2 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path1 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        use_second_for_testing=True,
        ftrz2=['Y', 'M', 'D', 'W', 'H', 'S', 'SWGDN', 'LWGAB', 'T2M', 'GHTSKIN'], #deadhorse features
        ftrz1=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Mid|Horizon={hrz}:')
    # Paths to your datasets
    data_path2 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path1 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        use_second_for_testing=True,
        ftrz2=['Y','M','D','SNODP','EVPSOIL'], #deadhorse features
        ftrz1=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Last|Horizon={hrz}:')
    # Paths to your datasets
    data_path2 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path1 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        use_second_for_testing=True,
        ftrz2=['Y','M','D','H'], #deadhorse features
        ftrz1=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
    
    

Ground|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9798420037562007, FAR=0.020625415834996674]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130798,1.0,1.0,0.0,0.033214,0.979842,0.979737,0.020625


Ground|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=10, target='T0')
Training ET model... [Test Accuracy=0.950684051008248, FAR=0.023984010659560292]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.135589,1.0,1.0,0.0,0.032684,0.950684,0.959616,0.023984


Ground|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9487629673680137, FAR=0.03355704697986577]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130054,1.0,1.0,0.0,0.032868,0.948763,0.955029,0.033557


Ground|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9519833526171275, FAR=0.03274215552523874]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.129612,1.0,1.0,0.0,0.033048,0.951983,0.957304,0.032742


*****************************************************************************************************************
Mid|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=5, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=5, target='T_07')
Training ET model... [Test Accuracy=0.9774365310766604, FAR=0.014747191011235955]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.137358,1.0,1.0,0.0,0.03269,0.977437,0.979666,0.014747


Mid|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=5, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=5, target='T_07')
Training ET model... [Test Accuracy=0.9762237326183201, FAR=0.019704433497536946]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.132954,1.0,1.0,0.0,0.033244,0.976224,0.977405,0.019704


Mid|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=5, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=5, target='T_07')
Training ET model... [Test Accuracy=0.9700275412895065, FAR=0.021970233876683204]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.133775,1.0,1.0,0.0,0.033043,0.970028,0.972289,0.02197


Mid|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=5, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=5, target='T_07')
Training ET model... [Test Accuracy=0.9731863689047484, FAR=0.024513338139870222]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.12966,1.0,1.0,0.0,0.033213,0.973186,0.973856,0.024513


*****************************************************************************************************************
Last|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=4, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=4, target='T_12')
Training ET model... [Test Accuracy=0.9763202042627466, FAR=0.008957133717210493]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.134178,1.0,1.0,0.0,0.032181,0.97632,0.982341,0.008957


Last|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=4, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=4, target='T_12')
Training ET model... [Test Accuracy=0.9816695412211107, FAR=0.008968609865470852]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.20995,1.0,1.0,0.0,0.032563,0.98167,0.985514,0.008969


Last|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=4, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=4, target='T_12')
Training ET model... [Test Accuracy=0.9841628264208909, FAR=0.0070967741935483875]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131636,1.0,1.0,0.0,0.034101,0.984163,0.987719,0.007097


Last|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=4, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=4, target='T_12')
Training ET model... [Test Accuracy=0.9775328006313505, FAR=0.011140235910878113]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.129381,1.0,1.0,0.0,0.033485,0.977533,0.982066,0.01114


### Agnostic/Train on both - unique features from both!

In [59]:
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz1=['Y', 'M', 'D', 'W', 'H', 'S', 'SWGDN', 'LWGAB', 'T2M', 'GHTSKIN'], #deadhorse features
        ftrz2=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Mid|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz1=['Y','M','D','SNODP','EVPSOIL'], #deadhorse features
        ftrz2=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Last|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz1=['Y','M','D','H'], #deadhorse features
        ftrz2=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
    
    

Ground|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=12, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=12, target='T0')
Training ET model... [Test Accuracy=0.9733991931748318, FAR=0.0166333998669328]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131337,1.0,1.0,0.0,0.033234,0.973399,0.97696,0.016633


Ground|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=12, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=12, target='T0')
Training ET model... [Test Accuracy=0.9491325032138198, FAR=0.0313124583610926]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130646,1.0,1.0,0.0,0.032382,0.949133,0.956104,0.031312


Ground|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=12, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=12, target='T0')
Training ET model... [Test Accuracy=0.9476132491339355, FAR=0.04429530201342282]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130738,1.0,1.0,0.0,0.032686,0.947613,0.950679,0.044295


Ground|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=12, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=12, target='T0')
Training ET model... [Test Accuracy=0.9557772712184096, FAR=0.02796725784447476]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130424,1.0,1.0,0.0,0.032898,0.955777,0.961401,0.027967


*****************************************************************************************************************
Mid|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=11, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=11, target='T8')
Training ET model... [Test Accuracy=0.9779307708718532, FAR=0.0175561797752809]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131536,1.0,1.0,0.0,0.03291,0.977931,0.979235,0.017556


Mid|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=11, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=11, target='T8')
Training ET model... [Test Accuracy=0.971297624243936, FAR=0.029556650246305417]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.202824,1.0,1.0,0.0,0.032998,0.971298,0.971132,0.029557


Mid|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=11, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=11, target='T8')
Training ET model... [Test Accuracy=0.9662054023988733, FAR=0.028348688873139617]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130152,1.0,1.0,0.0,0.032641,0.966205,0.967782,0.028349


Mid|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=11, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=11, target='T8')
Training ET model... [Test Accuracy=0.964694313380121, FAR=0.02883922134102379]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.128535,1.0,1.0,0.0,0.032786,0.964694,0.966499,0.028839


*****************************************************************************************************************
Last|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=10, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=10, target='T16')
Training ET model... [Test Accuracy=0.9824645975039139, FAR=0.008957133717210493]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131339,1.0,1.0,0.0,0.032587,0.982465,0.985989,0.008957


Last|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=10, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=10, target='T16')
Training ET model... [Test Accuracy=0.9698787759325875, FAR=0.021780909673286355]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131969,1.0,1.0,0.0,0.033055,0.969879,0.973368,0.021781


Last|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=10, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=10, target='T16')
Training ET model... [Test Accuracy=0.9657450076804915, FAR=0.01935483870967742]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.132651,1.0,1.0,0.0,0.032883,0.965745,0.971818,0.019355


Last|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=10, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=10, target='T16')
Training ET model... [Test Accuracy=0.9544102932679435, FAR=0.023591087811271297]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.128901,1.0,1.0,0.0,0.033383,0.95441,0.963187,0.023591


### Agnostic/Train on both - features defined by deadhorse

In [53]:
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz=['Y', 'M', 'D', 'W', 'H', 'S', 'SWGDN', 'LWGAB', 'T2M', 'GHTSKIN'], #deadhorse features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Mid|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz=['Y','M','D','SNODP','EVPSOIL'], #deadhorse features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Last|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz=['Y','M','D','H'], #deadhorse features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
    
    

Ground|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9768774686913093, FAR=0.015302727877578177]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.13365,1.0,1.0,0.0,0.033284,0.976877,0.979679,0.015303


Ground|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9528727327321691, FAR=0.02664890073284477]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130539,1.0,1.0,0.0,0.033,0.952873,0.960139,0.026649


Ground|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9499622424225262, FAR=0.03959731543624161]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.138115,1.0,1.0,0.0,0.033861,0.949962,0.95379,0.039597


Ground|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=10, target='T0')
Training ET model... [Test Accuracy=0.9585902107402099, FAR=0.02796725784447476]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.135361,1.0,1.0,0.0,0.107558,0.95859,0.963265,0.027967


*****************************************************************************************************************
Mid|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=5, target='T8')
Training ET model... [Test Accuracy=0.9708300739581852, FAR=0.020365168539325844]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.134436,1.0,1.0,0.0,0.032795,0.97083,0.973348,0.020365


Mid|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=5, target='T8')
Training ET model... [Test Accuracy=0.9753783660998228, FAR=0.023926812104152005]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.128879,1.0,1.0,0.0,0.032699,0.975378,0.975623,0.023927


Mid|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=5, target='T8')
Training ET model... [Test Accuracy=0.9753693852102379, FAR=0.022678951098511695]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.128186,1.0,1.0,0.0,0.032794,0.975369,0.975956,0.022679


Mid|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=5, target='T8')
Training ET model... [Test Accuracy=0.9782414463417082, FAR=0.01946647440519106]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.133074,1.0,1.0,0.0,0.033195,0.978241,0.978893,0.019466


*****************************************************************************************************************
Last|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=4, target='T16')
Training ET model... [Test Accuracy=0.9801604500384762, FAR=0.008957133717210493]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.13108,1.0,1.0,0.0,0.03296,0.98016,0.984622,0.008957


Last|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=4, target='T16')
Training ET model... [Test Accuracy=0.9836569260336077, FAR=0.009609224855861628]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.13171,1.0,1.0,0.0,0.034195,0.983657,0.986431,0.009609


Last|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=4, target='T16')
Training ET model... [Test Accuracy=0.983394777265745, FAR=0.0070967741935483875]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.132395,1.0,1.0,0.0,0.033793,0.983395,0.987261,0.007097


Last|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=4, target='T16')
Training ET model... [Test Accuracy=0.9752392226496991, FAR=0.015727391874180863]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.132502,1.0,1.0,0.0,0.033803,0.975239,0.978879,0.015727


### Agnostic/Train on both - features defined by toolik lake

In [54]:
for hrz in list([0,7,30,90]):
    print(f'Ground|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Mid|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'Last|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=True, 
        ftrz=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'], #toolik features
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
    
    

Ground|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=9, target='T0')
Training ET model... [Test Accuracy=0.972363290297043, FAR=0.017298735861610112]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.133715,1.0,1.0,0.0,0.033225,0.972363,0.976057,0.017299


Ground|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=9, target='T0')
Training ET model... [Test Accuracy=0.9515782905293184, FAR=0.030646235842771485]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130744,1.0,1.0,0.0,0.032751,0.951578,0.95793,0.030646


Ground|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=9, target='T0')
Training ET model... [Test Accuracy=0.9499622424225262, FAR=0.03959731543624161]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130334,1.0,1.0,0.0,0.106079,0.949962,0.95379,0.039597


Ground|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=9, target='T0')
Training ET model... [Test Accuracy=0.9544130147381913, FAR=0.030695770804911322]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.133142,1.0,1.0,0.0,0.03339,0.954413,0.959592,0.030696


*****************************************************************************************************************
Mid|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=9, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=9, target='T8')
Training ET model... [Test Accuracy=0.9778614350732471, FAR=0.018960674157303372]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.132385,1.0,1.0,0.0,0.033362,0.977861,0.978792,0.018961


Mid|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=9, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=9, target='T8')
Training ET model... [Test Accuracy=0.9577918028844012, FAR=0.04011259676284307]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.134276,1.0,1.0,0.0,0.033359,0.957792,0.958513,0.040113


Mid|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=9, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=9, target='T8')
Training ET model... [Test Accuracy=0.9440552978855108, FAR=0.04606661941885188]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.134096,1.0,1.0,0.0,0.032818,0.944055,0.946936,0.046067


Mid|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=9, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=9, target='T8')
Training ET model... [Test Accuracy=0.952308506657662, FAR=0.029560201874549386]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.129399,1.0,1.0,0.0,0.032972,0.952309,0.957215,0.02956


*****************************************************************************************************************
Last|Horizon=0:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=9, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=9, target='T16')
Training ET model... [Test Accuracy=0.9834242904021866, FAR=0.007037747920665387]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.129814,1.0,1.0,0.0,0.033258,0.983424,0.987336,0.007038


Last|Horizon=7:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=9, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=9, target='T16')
Training ET model... [Test Accuracy=0.96712216035086, FAR=0.02114029468289558]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.128842,1.0,1.0,0.0,0.033425,0.967122,0.971983,0.02114


Last|Horizon=30:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=9, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=9, target='T16')
Training ET model... [Test Accuracy=0.9665130568356375, FAR=0.01935483870967742]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.126732,1.0,1.0,0.0,0.033391,0.966513,0.972279,0.019355


Last|Horizon=90:
Combining datasets...
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=9, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=9, target='T16')
Training ET model... [Test Accuracy=0.9584760213644499, FAR=0.02621231979030144]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.128486,1.0,1.0,0.0,0.033205,0.958476,0.964653,0.026212


### Location specific: Train on Deadhorse

In [55]:
for hrz in list([0,7,30,90]):
    print(f'T0|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=False, 
        ftrz=['Y', 'M', 'D', 'W', 'H', 'S', 'SWGDN', 'LWGAB', 'T2M', 'GHTSKIN'],
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'T0|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=False, 
        ftrz=['Y','M','D','SNODP','EVPSOIL'],
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()

    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'T0|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 1 (Deadhorse)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 2 (Toolik lake)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=False, 
        ftrz=['Y','M','D','H'],
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    

T0|Horizon=0:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=10, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.9197425381635909, FAR=0.03132832080200501]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.13032,1.0,1.0,0.0,0.032459,0.919743,0.934445,0.031328


T0|Horizon=7:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=10, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8919767909565176, FAR=0.04140526976160602]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.124096,1.0,1.0,0.0,0.031928,0.891977,0.911633,0.041405


T0|Horizon=30:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=10, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8948341877144124, FAR=0.04804045512010114]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.126383,1.0,1.0,0.0,0.031755,0.894834,0.911586,0.04804


T0|Horizon=90:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=10, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=10, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8857494309147338, FAR=0.059050064184852376]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.124343,1.0,1.0,0.0,0.031969,0.885749,0.901607,0.05905


*****************************************************************************************************************
T0|Horizon=0:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=5, target='T8')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8382474276839449, FAR=0.24752475247524752]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.1243,1.0,1.0,0.0,0.032691,0.838247,0.815236,0.247525


T0|Horizon=7:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=5, target='T8')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8166001530723813, FAR=0.25650557620817843]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.123354,1.0,1.0,0.0,0.032381,0.8166,0.7982,0.256506


T0|Horizon=30:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=5, target='T8')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.7787363698596645, FAR=0.29301745635910226]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.119898,1.0,1.0,0.0,0.031924,0.778736,0.761987,0.293017


T0|Horizon=90:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=5, target='T_07')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=5, target='T8')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.778518242740134, FAR=0.32531645569620254]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.192184,1.0,1.0,0.0,0.03223,0.778518,0.75196,0.325316


*****************************************************************************************************************
T0|Horizon=0:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=4, target='T16')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.7670921489770939, FAR=0.2252141982864137]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.12858,1.0,1.0,0.0,0.032586,0.767092,0.774619,0.225214


T0|Horizon=7:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=4, target='T16')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.7483782986878962, FAR=0.24509803921568626]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.129202,1.0,1.0,0.0,0.032127,0.748378,0.756252,0.245098


T0|Horizon=30:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=4, target='T16')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.7662992252517855, FAR=0.2318125770653514]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.117344,1.0,1.0,0.0,0.032502,0.766299,0.772053,0.231813


T0|Horizon=90:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=4, target='T_12')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=4, target='T16')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.754522413668715, FAR=0.21777221526908636]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.120979,1.0,1.0,0.0,0.032482,0.754522,0.767661,0.217772


### Location specific: Train on Toolik

In [56]:
for hrz in list([0,7,30,90]):
    print(f'T0|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_{hrz}Days.csv'  # Dataset 1 (Toolik lake)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_{hrz}Days.csv'  # Dataset 2 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=False, 
        ftrz=['Y', 'M', 'D', 'W', 'SWGDN', 'T2M', 'GHTSKIN', 'HFLUX', 'SPEED'],
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'T0|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_{hrz}Days.csv'  # Dataset 1 (Toolik lake)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_{hrz}Days.csv'  # Dataset 2 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=False, 
        ftrz=['Y', 'S', 'SNODP', 'SWLAND', 'GHTSKIN', 'SPEED', 'TLML', 'EVPSOIL', 'SLP'],
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
print(f'*****************************************************************************************************************')    
for hrz in list([0,7,30,90]):
    print(f'T0|Horizon={hrz}:')
    # Paths to your datasets
    data_path1 = f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_{hrz}Days.csv'  # Dataset 1 (Toolik lake)
    data_path2 =  f'/home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_{hrz}Days.csv'  # Dataset 2 (Deadhorse)

    # Instantiate the pipeline without combining the datasets
    pipeline = FlexibleDatasetClassificationPipeline(
        data_path1=data_path1, 
        data_path2=data_path2, 
        combine_datasets=False, 
        ftrz=['Y', 'M', 'D', 'S', 'SNODP', 'GHTSKIN', 'HFLUX', 'TLML', 'SLP'],
        seed=42,
        test_size=0.2,
        use_smote=False
    )

    # Train and test the model using a Random Forest classifier
    pipeline.train_and_test_model(model_name="ET")

    # Display the results
    pipeline.display_results()
    
    

T0|Horizon=0:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_0Days.csv, target is T0, (#samples=6080, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_0Days.csv, target is T0, (#samples=4987, #features=9, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.9442741278280105, FAR=0.056657223796033995]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130559,1.0,1.0,0.0,0.033484,0.944274,0.94451,0.056657


T0|Horizon=7:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_7Days.csv, target is T0, (#samples=6071, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_7Days.csv, target is T0, (#samples=4980, #features=9, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.939436099003736, FAR=0.03551136363636364]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.19989,1.0,1.0,0.0,0.033256,0.939436,0.949799,0.035511


T0|Horizon=30:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_30Days.csv, target is T0, (#samples=6048, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_30Days.csv, target is T0, (#samples=4957, #features=9, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.9334735812133073, FAR=0.054285714285714284]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131348,1.0,1.0,0.0,0.032697,0.933474,0.938945,0.054286


T0|Horizon=90:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T0_90Days.csv, target is T0, (#samples=5988, #features=9, target='T0')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T0_90Days.csv, target is T0, (#samples=4897, #features=9, target='T0')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.9638220770946162, FAR=0.055232558139534885]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.133111,1.0,1.0,0.0,0.032482,0.963822,0.956757,0.055233


*****************************************************************************************************************
T0|Horizon=0:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_0Days.csv, target is T8, (#samples=6080, #features=9, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_0Days.csv, target is T_07, (#samples=4987, #features=9, target='T_07')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8012723533011491, FAR=0.09902597402597403]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.126717,1.0,1.0,0.0,0.032562,0.801272,0.82173,0.099026


T0|Horizon=7:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_7Days.csv, target is T8, (#samples=6071, #features=9, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_7Days.csv, target is T_07, (#samples=4980, #features=9, target='T_07')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8229829288674386, FAR=0.07654723127035831]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.126072,1.0,1.0,0.0,0.032048,0.822983,0.843479,0.076547


T0|Horizon=30:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_30Days.csv, target is T8, (#samples=6048, #features=9, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_30Days.csv, target is T_07, (#samples=4957, #features=9, target='T_07')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8204188481675393, FAR=0.1]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.123536,1.0,1.0,0.0,0.031994,0.820419,0.836925,0.1


T0|Horizon=90:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T8_90Days.csv, target is T8, (#samples=5988, #features=9, target='T8')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_07_90Days.csv, target is T_07, (#samples=4897, #features=9, target='T_07')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.807013780665044, FAR=0.05351170568561873]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.133099,1.0,1.0,0.0,0.033177,0.807014,0.832205,0.053512


*****************************************************************************************************************
T0|Horizon=0:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_0Days.csv, target is T16, (#samples=6080, #features=9, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_0Days.csv, target is T_12, (#samples=4987, #features=9, target='T_12')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8624569130601303, FAR=0.09651474530831099]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.132043,1.0,1.0,0.0,0.033043,0.862457,0.884666,0.096515


T0|Horizon=7:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_7Days.csv, target is T16, (#samples=6071, #features=9, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_7Days.csv, target is T_12, (#samples=4980, #features=9, target='T_12')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.869940372737239, FAR=0.1087248322147651]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.131836,1.0,1.0,0.0,0.032486,0.86994,0.88346,0.108725


T0|Horizon=30:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_30Days.csv, target is T16, (#samples=6048, #features=9, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_30Days.csv, target is T_12, (#samples=4957, #features=9, target='T_12')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.865948090948091, FAR=0.10540540540540541]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.130012,1.0,1.0,0.0,0.032287,0.865948,0.882605,0.105405


T0|Horizon=90:
Loading separate datasets, Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Toolik_T16_90Days.csv, target is T16, (#samples=5988, #features=9, target='T16')
Loading data from /home/mohamed.ahajjam/FreezeThaw/FTstatesDataset_Deadhorse_T_12_90Days.csv, target is T_12, (#samples=4897, #features=9, target='T_12')
Standardizing separate datasets
Training ET model... [Test Accuracy=0.8621031746031746, FAR=0.125]


  self.results = pd.concat([self.results, pd.DataFrame({


Unnamed: 0,Model,Train Time (s),Train wAccuracy,Train f1w,Train FAR,Test Time (s),Test wAccuracy,Test f1w,Test FAR
0,ET,0.129656,1.0,1.0,0.0,0.032821,0.862103,0.872144,0.125
