In [2]:
import pandas as pd
import os
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import r2_score
import featuretools as ft
from feature_selector import FeatureSelector
import time
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectPercentile, mutual_info_regression, SelectKBest
from sklearn.feature_selection import RFE
import warnings
warnings.filterwarnings('ignore')

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [9]:
start = time.time()

# Read meta data
meta = pd.read_csv('/Users/t.wang/Desktop/Dissertation/Python/input/meta_open.csv', 
                   index_col='uid', parse_dates=['dataend','datastart'], dayfirst=True)#The data will be messed up withou specifying dayfirst


# Read energy data
temporal = pd.read_csv('/Users/t.wang/Desktop/Dissertation/Python/input/temp_open_utc_complete.csv', 
                   index_col='timestamp', parse_dates=True)#.tz_localize('utc')

def loopModels_and_Metrics(ml_Models_names, ml_Models, weatherPoints, cor_threshold, 
                           buildingNames, agg_primitives, trans_primitives, varianceThreshold, Select_num,
                           Select_func, RFE_step):  
    print('\n\n' + ml_Models_names + '\n_____________')
    buildingindex = 0
    for single_building in buildingNames:
        buildingindex+=1
        print('Modelling:' + single_building)
        
        # Read energy data for each given buildingname
        single_timezone = meta.T[single_building].timezone
        startdate = meta.T[single_building].datastart
        enddate = meta.T[single_building].dataend
        single_building_energy = temporal[single_building].tz_convert(single_timezone).truncate(before = startdate, 
                                                            after = enddate)#.fillna(method='bfill').fillna(method='ffill')
                                                            # single_building_energy, some missing data


        # Get weather data for given building
        weatherfile_name = meta.T[single_building].newweatherfilename
        weather_data = pd.read_csv(os.path.join('/Users/t.wang/Desktop/Dissertation/Python/input/',
                                                weatherfile_name),index_col='timestamp', parse_dates=True, na_values='-9999')
        weather_data = weather_data.tz_localize(single_timezone, ambiguous = 'infer')
        weather_point_list=[]
        for point in weatherPoints:
            point_data = weather_data[[point]]
            weather_point_list.append(point_data)
            all_weather_point = pd.concat(weather_point_list,axis=1) #axis=1, rowwise concat
            all_weather_point = all_weather_point[~all_weather_point.index.duplicated()]#To get rid of duplicated index
            all_weather_point = all_weather_point.reindex(pd.DatetimeIndex(start = all_weather_point.index[0], 
                                                                           periods=len(single_building_energy), 
                                                                           freq='H')).fillna(method='ffill').fillna(method='bfill')
#             in some cases, there are more than 1 data in the same hour, creating more than 8760 points
#             to make them consistent, take the first index minuits, based on the number of energy data,
#             transform them into hourly data. Then we get the same number of energy data (mostly8760)
#             DatatimeIndex them, reindex then is able to match and select those hour with the minuites
#             same as first index, regulating the data to be consistent with number of energy points, get
#             rid of the repeated weather data in the same hour.
    
        # Get schedule data for given building
        schedule_name = meta.T[single_building].annualschedule
        schedule_data = pd.read_csv(os.path.join('/Users/t.wang/Desktop/Dissertation/Python/input/',
                                                schedule_name),index_col=0, header=None, parse_dates=True)
        schedule_data = schedule_data.tz_localize(single_timezone, ambiguous = 'infer')
        schedule_data.columns = ['seasonal']
        schedule_data = schedule_data.reindex(pd.DatetimeIndex(start = schedule_data.index[0], periods=len(single_building_energy), 
                                                               freq='H')).fillna(method='ffill').fillna(method='bfill')
#         same trick is applied to selecting schedule data


        
        features = pd.merge(pd.DataFrame(single_building_energy.index.tz_localize(None)), 
                    schedule_data.reset_index(drop=True), right_index=True, left_index=True)#remove the time zone information
                #Map the schedule, otherwise the TimeSplits will not be able to capture all schedules, resulting in inconsistency of traning/test feature dimensions
        features['seasonal_num'] = features.seasonal.map({'Break':0, 'Regular':1, 'Holiday':2, 'Summer':3})
        features = features.drop('seasonal', axis=1)
        features = pd.concat([features, all_weather_point.reset_index(drop=True)], axis=1) #.reset_index(drop=True) to get rid of the time index, otherwise two sets data will stratify
#                 features = features.fillna(method='ffill').fillna(method='bfill')
        # features = np.array(features)
        labels = single_building_energy.values
        '''FeatureTool'''
        es = ft.EntitySet(id = 'buildingFeatures') #create Entity set
        # create an entity from feature table, unique index is created
        es = es.entity_from_dataframe(entity_id='featureData', dataframe=features,
                      make_index=True, index='feature_id', time_index = 'timestamp')

        features_FE, feature_names = ft.dfs(entityset = es, target_entity = 'featureData', max_depth = 2
                        ,agg_primitives = agg_primitives,
                        trans_primitives = trans_primitives, verbose = True, n_jobs=1) #Not sure why n_jobs more than 1 is not working

        # one hot encoding for categorical data
        features_enc, feature_names_enc = ft.encode_features(features_FE, feature_names)
        # Replace infinity number arising after feature generation
        features_enc = features_enc.replace(np.inf, '9999')
        features_enc = features_enc.replace(-np.inf, '-9999')
        features_enc = features_enc.replace([np.nan,''],0)
#         print(features_enc)

        '''Feature Selection'''
#                 Filter methods - Remove collinear features - FeatureSelector
        y = labels
        X = features_enc
        fs = FeatureSelector(data = X, labels = y)
        fs.identify_collinear(correlation_threshold = cor_threshold, one_hot=False)
        X_collinear = fs.remove(methods = ['collinear'], keep_one_hot=False)
        
#         Filter methods - Remove features with low variance - SKlearn
        sel = VarianceThreshold(threshold=(varianceThreshold * (1 - varianceThreshold)))
        X_collinear_Variance = sel.fit_transform(X_collinear)
        X_collinear_Variance = pd.DataFrame(X_collinear_Variance)

#                 Filter methods - SelectBest based on mutual information - SKlearn   
        X_collinear_Variance_MI = SelectKBest(score_func=Select_func, k=Select_num).fit_transform(X_collinear_Variance,y)
        X_collinear_Variance_MI = pd.DataFrame(X_collinear_Variance_MI).set_index(features.timestamp)
        y = pd.DataFrame(y).set_index(features.timestamp)
        print(X_collinear_Variance_MI.shape)
#         Extract one year data for later Wrapper methods comparison
#         pd.DataFrame(X_collinear_Variance_MI).to_csv('/Users/t.wang/Desktop/Dissertation/Python/WT-result/'+ 'One_year_features' + '.csv', index=False)
#         pd.DataFrame(y).to_csv('/Users/t.wang/Desktop/Dissertation/Python/WT-result/'+ 'One_year_target' + '.csv', index=False)
                                
# #                 Wrapper methods - Recursive feature elimination -SKlearn
#         Wrapper_model = ml_Models
#         rfe = RFE(estimator=Wrapper_model, step=RFE_step,verbose=True,n_features_to_select=None)
#         rfe = rfe.fit(X_collinear_Variance_MI, y)
#         X_collinear_Variance_MI_RFE = rfe.transform(X_collinear_Variance_MI)  
# #         add timestamp back to features for further Time split
#         X_collinear_Variance_MI_RFE = pd.DataFrame(X_collinear_Variance_MI_RFE).set_index(features.timestamp)
#         y = pd.DataFrame(y).set_index(features.timestamp)
# #         print(X_collinear_Variance_MI_RFE)
# #         print(y.shape)
        
                # Create TimeSeriesSplit
         # get the month number for splitting
#         months = np.array(single_building_energy.index.month.unique())
        months = range(1,13)
#         tscv = TimeSeriesSplit(n_splits=n_timeSeriesSplits)
        train_test_list = []
#         for train_index, test_index in tscv.split(months):
#             months_train, months_test = months[train_index], months[test_index]
#             train_test_list.append([months_train, months_test])

        # Mannual distribute the test and trained months instead of TimeSeriesSplit
        train_test_list.append([np.array(months[0:9]),np.array(months[9:12])])
        train_test_list.append([np.concatenate([months[0:3],months[4:7],months[8:11]]),
                                     np.array([months[3],months[7],months[11]])])
        train_test_list.append([np.concatenate([months[0:4],months[5:8],months[9:11]]),
                                     np.array([months[4],months[8],months[11]])])
        train_test_list.append([np.concatenate([months[0:5],months[6:9],months[10:11]]),
                                     np.array([months[5],months[9],months[11]])])
        train_test_list.append([np.concatenate([months[0:2],months[3:6],months[8:12]]),
                                     np.array([months[2],months[6],months[7]])])
        
        index = 0 #index for each TimeSeries cv
        CV_list = []
        for train_index, test_index in train_test_list: #get rid of the 'array', extract the numeric months from the list
        #     print(train_index, test_index)
            months_for_train = train_index
            months_for_test = test_index

            # Create features and labels under last 'for' loop such that all TimeSeriesSplit could be implenmented
            def get_features_and_labels(train_or_test):
                nonlocal X_collinear_Variance_MI
                nonlocal y
                X_collinear_Variance_MI_n = np.array(X_collinear_Variance_MI[X_collinear_Variance_MI.index.month.isin(train_or_test)])
                y_n = np.array(y[y.index.month.isin(train_or_test)].T)[0]
                #rename _n is required, otherwise the function will run on top of incomplete dataset after one running(after traindata, testdata disappeared)     
#                 transform it to array for ml 
                return X_collinear_Variance_MI_n, y_n


            # test on model and calculate errors
            X_train, y_train = get_features_and_labels(train_or_test=months_for_train)
            X_test, y_test = get_features_and_labels(train_or_test=months_for_test)
#             print(months_for_train, X_train.shape, y_train.shape, months_for_test, X_test.shape, y_test.shape)
#             print(X_test)
#             print(y_test)
#             compare = pd.concat([pd.DataFrame(X_train), pd.DataFrame(X_test)], axis=1) 
#             pd.DataFrame(X_train).to_csv('/Users/t.wang/Desktop/' + 'X_train' +'.csv', index=False)
#             pd.DataFrame(X_test).to_csv('/Users/t.wang/Desktop/' + 'X_test' +'.csv', index=False)
            ml_Models.fit(X_train, y_train)
            predictions = ml_Models.predict(X_test)
#             errors = abs(predictions - y_test)
#             MAPE = 100 * np.mean((errors / y_test))
#             NMBE = 100 * (sum(y_test - predictions) / (pd.Series(y_test).count() * np.mean(y_test)))
            CV = 100 * ((sum((y_test - predictions)**2) / (pd.Series(y_test).count()))**(0.5)) / np.mean(y_test)
#             RSQUARED = r2_score(y_test, predictions)
            CV_list.append(CV)
        CV_mean = np.mean(CV_list)
            
        if (buildingindex==1):
#               create the csv at the start of looping each metrics for each building
            temporary = pd.DataFrame(columns=['buildingName','CV_1','CV_2','CV_3','CV_4','CV_5','CV_mean'])
#                                                   'trainedMonths_','testMonths_'])
            temporary.to_csv('/Users/t.wang/Desktop/Dissertation/Python/WT-result/New_scope/Loop_with_FE_FS/' 
                             + ml_Models_names + '_metrics_cross_validation_mean_FE_Filter' + '.csv', index=False)
#           read and the csv and metrics result
        metrics_prev = pd.read_csv('/Users/t.wang/Desktop/Dissertation/Python/WT-result/New_scope/Loop_with_FE_FS/' 
                             + ml_Models_names + '_metrics_cross_validation_mean_FE_Filter' + '.csv')
        df = pd.DataFrame([[single_building, CV_list[0],CV_list[1],CV_list[2],CV_list[3],CV_list[4],CV_mean]],
#                                 ,str(months_for_train),str(months_for_test)]],
                          columns=['buildingName','CV_1','CV_2','CV_3','CV_4','CV_5','CV_mean'])
#                                        'trainedMonths_','testMonths_'])
#           write the csv
        metrics = pd.concat([df, metrics_prev], sort=False)
#           export csv
        metrics.to_csv('/Users/t.wang/Desktop/Dissertation/Python/WT-result/New_scope/Loop_with_FE_FS/' 
                             + ml_Models_names + '_metrics_cross_validation_mean_FE_Filter' + '.csv', index=False)

    
ml_Models_lists = [['RandomForestRegressor_FE', RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)]]
weatherPoints = ['TemperatureC', 'Humidity','Dew PointC','Sea Level PressurehPa', 
                 'Wind Direction','Conditions','WindDirDegrees']

buildingNames = meta.dropna(subset=['annualschedule']).index#[100:101] #drop buildings with missing schedule
agg_primitives = []
trans_primitives = ['is_weekend','divide_by_feature',
                               'multiply_numeric','subtract_numeric']
#                                ,'divide_numeric']
cor_threshold = 0.98
varianceThreshold = 0.8
Select_num = 60
Select_func = mutual_info_regression
RFE_step = 10

for elem in ml_Models_lists:
#     ml_Models_names = elem[0], ml_Models = elem[1], not sure why this gives warning 'no n_estimator'
    loopModels_and_Metrics(ml_Models_names = elem[0], ml_Models=elem[1],weatherPoints=weatherPoints,
                           buildingNames=buildingNames, cor_threshold = cor_threshold,
                           agg_primitives=agg_primitives, trans_primitives=trans_primitives,varianceThreshold=varianceThreshold,
                           Select_num=Select_num, Select_func=Select_func, RFE_step=RFE_step)

end = time.time()
elapsed = end - start 
print('Time per building after FE and FS:'+ time.strftime("%H:%M:%S", time.gmtime(elapsed)))



# all_weather_point
# schedule_data
# single_building_energy
# train_test_list
# X_train,y_train
# X_train.shape,y_train.shape
# X_test,y_test
# X_test.shape,y_test.shape
# buildingNames




RandomForestRegressor_FE
_____________
Modelling:Office_Abbey
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:Office_Abigail
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:Office_Al
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:Office_Alannah
Built 432 features
Elapsed: 00:10 | Remaining: 00:00 | Progress: 100%|███████

Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Cameron
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Carissa
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Carolina
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude great

(8784, 60)
Modelling:Office_Evelyn
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 11/11 chunks
268 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 268 features including one-hot features.
(8784, 60)
Modelling:Office_Gabriela
Built 432 features
Elapsed: 00:04 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
268 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 268 features including one-hot features.
(8760, 60)
Modelling:Office_Garman
Built 432 features
Elapsed: 00:04 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
268 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 268 features including one-hot features.
(8760, 60)
Modelling:Office_Garrett
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 c

(8760, 60)
Modelling:Office_Lesa
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Lillian
Built 432 features
Elapsed: 00:04 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Louise
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Luann
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks

(8760, 60)
Modelling:Office_Melinda
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:Office_Mercedes
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:Office_Michael
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:Office_Micheal
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10

(8760, 60)
Modelling:Office_Phebian
Built 432 features
Elapsed: 00:08 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
230 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 230 features including one-hot features.
(8760, 60)
Modelling:Office_Precious
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
230 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 230 features including one-hot features.
(8760, 60)
Modelling:Office_Scottie
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:Office_Shari
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 c

Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Jaqueline
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Jarrett
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Jasmine
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magni

Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Jessie
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Jill
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Jim
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude great

Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Julius
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Justice
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 204 features including one-hot features.
(8760, 60)
Modelling:PrimClass_Justin
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
204 features with a correlation magnitude

Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Annmarie
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Antoinette
Built 432 features
Elapsed: 00:05 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Anya
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnit

Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Colette
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Conner
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Conor
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude 

Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Tammy
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
231 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 231 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Tamra
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
231 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 231 features including one-hot features.
(8760, 60)
Modelling:UnivClass_Teri
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
231 features with a correlation magnitude grea

(8760, 60)
Modelling:UnivDorm_Chelsey
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivDorm_Cheri
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivDorm_Chester
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivDorm_Cheyenne
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated:

Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:UnivDorm_Mathew
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:UnivDorm_Mauricio
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:UnivDorm_Mckenzie
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude

(8760, 60)
Modelling:UnivLab_Allison
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Amaya
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Amy
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
223 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 223 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Ana
Built 432 features
Elapsed: 00:04 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks

233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Carlos
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Carlton
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Carol
Built 432 features
Elapsed: 00:07 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
233 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 233 features including one-hot f

(8760, 60)
Modelling:UnivLab_Mack
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Madelyn
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Margret
Built 432 features
Elapsed: 00:06 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10 chunks
234 features with a correlation magnitude greater than 0.98.

Data has not been one-hot encoded
Removed 234 features including one-hot features.
(8760, 60)
Modelling:UnivLab_Mariana
Built 432 features
Elapsed: 00:03 | Remaining: 00:00 | Progress: 100%|██████████| Calculated: 10/10