## Load Training / Testing Data

In [1]:
FileSaveFolder = "A:\\Files\\Shares\\Downloads\\yelp_dataset\\yelp_dataset~\\"
def LoadTrainTestData():
    import pickle
    with open(FileSaveFolder + "TrainTestData.dat", "rb") as filePath:
        train_x = pickle.load(file=filePath)
        train_y = pickle.load(file=filePath)
        test_x = pickle.load(file=filePath)
        test_y = pickle.load(file=filePath)
    return train_x, train_y, test_x, test_y

In [2]:
Train_X, Train_Y, Test_X, Test_Y = LoadTrainTestData()

In [3]:
print(Train_X.shape)
print(Train_Y.shape)
print(Test_X.shape)
print(Test_Y.shape)
print(Test_Y.unique())

(151483, 1743)
(151483,)
(37871, 1743)
(37871,)
[2.  4.  3.  2.5 3.5]


## Classification Models

In [3]:
# Run through multiple classifiers and rank results

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, precision_recall_fscore_support
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB, ComplementNB
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from xgboost import XGBClassifier
import pandas as pd
import numpy as np

def AssessClassifierModels(TrainingDataColumns, TrainingDataResults, TestingDataColumns, TestingDataResults, Algorithms_List):
    from datetime import datetime
    functionStartTime = datetime.now()
    print()
    
    # results container
    results_list = pd.DataFrame( columns = ["Name",
                                            "Precision",
                                            "Recall",
                                            "F1",
                                            "Support",
                                            "ModelData",
                                            "ExecutionTime",
                                           ]) # Set index later to avoid empty row
    
    # calculated metrics and append to list
    for algorithm in Algorithms_List:
        loopStartTime = datetime.now()
        print("Starting " + str(algorithm.__name__) + " at " + str(loopStartTime))

        algorithmObject = algorithm()
        
        if(str(algorithm.__name__) == "XGBClassifier"):
            algorithmObject = XGBClassifier(nthread=4)

        algorithmObject.fit(TrainingDataColumns, TrainingDataResults)
        algorithmPredictions = algorithmObject.predict(TestingDataColumns)
        (algorithmPrecision, algorithmRecall, algorithmF1, algorithmSupportList) = precision_recall_fscore_support(
            TestingDataResults, algorithmPredictions, labels = np.sort(TrainingDataResults.unique()))
        algorithmExecutionTime = str(datetime.now() - loopStartTime)
        
        results_list = results_list.append({"Name":  algorithm.__name__,
                                            "Precision": algorithmPrecision,
                                            "Recall": algorithmRecall,
                                            "F1": algorithmF1,
                                            "Support": algorithmSupportList,
#                                            "ConfusionMatrix": "",# confusion_matrix(TestingDataResults, algorithmPredictions),
                                            "ModelData" : algorithmObject,
                                            "ExecutionTime": algorithmExecutionTime, 
                                            }, ignore_index = True)
#         print("\tEnding " + str(algorithm.__name__) + " at " + str(datetime.now()) + "\n")
        
#         with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", 1000):
#             print(results_list.iloc[len(results_list)-1, :])

    # Set index to a meaningful value
    results_list.set_index("Name")
    print("Assessment Complete.")
    return results_list

In [4]:
#StarPolarity_Map = {1:-1.0, 2:-0.5, 3:0.0, 4:0.5, 5:1.0}
StarPolarity_Map = {1:2.0, 2:2.5, 3:3.0, 4:3.5, 5:4.0}
ClassifierResults_List = AssessClassifierModels(Train_X, Train_Y.map(StarPolarity_Map).apply(str).astype("category"), Test_X, Test_Y.map(StarPolarity_Map).apply(str).astype("category"), [
    XGBClassifier,
    MultinomialNB,
    GaussianNB,
    BernoulliNB,
    #KNeighborsClassifier,
    #DecisionTreeClassifier,
    ExtraTreeClassifier
])


Starting XGBClassifier at 2020-01-26 20:50:49.132752
Starting MultinomialNB at 2020-01-26 20:56:59.233867
Starting GaussianNB at 2020-01-26 20:57:16.819218
Starting BernoulliNB at 2020-01-26 20:57:28.082600
Starting ExtraTreeClassifier at 2020-01-26 20:58:02.415572
Assessment Complete.


In [5]:
with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", 1000):
    print(ClassifierResults_List[["Name", "Precision", "Recall", "F1", "Support", "ExecutionTime"]])

                  Name  \
0        XGBClassifier   
1        MultinomialNB   
2           GaussianNB   
3          BernoulliNB   
4  ExtraTreeClassifier   

                                                                           Precision  \
0   [0.7348448108632396, 0.5679012345679012, 0.6844848322865161, 0.4704794423104282]   
1   [0.6800494132180358, 0.4446034926996851, 0.6594034271207955, 0.5072117436203807]   
2    [0.6544046466602129, 0.3475364330326162, 0.3961166135079179, 0.456190686382697]   
3   [0.6396648044692738, 0.3786302220841746, 0.3935207528957529, 0.4913923286016309]   
4  [0.5112625620884833, 0.2629605477665471, 0.4551152899037385, 0.41022197087515905]   

                                                                              Recall  \
0   [0.6910272488883822, 0.0897224306076519, 0.5788224658153464, 0.7302370086854114]   
1   [0.7531638353665489, 0.4660165041260315, 0.7044863826421065, 0.4374355954659208]   
2  [0.6165773571998632, 0.3756939234808702, 0.7999

In [8]:
import pickle
with open(FileSaveFolder + "ClassifierResults.dat", "wb") as filePath:
    pickle.dump(ClassifierResults_List, file=filePath, protocol = pickle.HIGHEST_PROTOCOL)

In [9]:
import gc
del ClassifierResults_List
gc.collect()
gc.collect()

0

In [None]:
import gc
#del TrainingData
#del WordCounts
gc.collect()
gc.collect()

## Regression Models

In [3]:
# Run through multiple Regression models and rank results
import pandas as pd

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import ExtraTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

def AssessRegressionModels(x_train, y_train, x_test, y_test, algorithms_list):
    from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

    from datetime import datetime

    import warnings
    warnings.simplefilter(action='ignore', category=FutureWarning)
    # results container
    results_list = pd.DataFrame( columns = ["Name", "R2Score", "RMSE", "MAE", "ModelData"])
    
    # calculated metrics and append to list
    functionStartTime = datetime.now()
    for algorithm in algorithms_list:
        algorithmObject = algorithm()
        if(str(algorithm.__name__) == "XGBRegressor"):
            algorithmObject = XGBRegressor(objective = "reg:squarederror",
                                           nthread=6,
                                           learning_rate = 0.3,
                                           max_depth = 9,
                                           n_estimators = 200,
                                          )
            
        loopStartTime = datetime.now()
        print("Starting " + str(algorithm.__name__) + " at " + str(loopStartTime))

        algorithmObject.fit(x_train, y_train)
        y_predictor = algorithmObject.predict(x_test)
        
        algorithmExecutionTime = str(datetime.now() - loopStartTime)

        results_list = results_list.append({"Name" : algorithm.__name__, 
                                            "R2Score": r2_score(y_test, y_predictor),
                                            "RMSE": (mean_squared_error(y_test, y_predictor)),
                                            "MAE": mean_absolute_error(y_test, y_predictor),
                                            "ModelData" : algorithmObject,
                                            "RunTime" : algorithmExecutionTime,
                                            }, ignore_index = True)

        print("\tEnding " + str(algorithm.__name__) + " at " + str(datetime.now()))

    # sort list by r2 score
    results_list.sort_values(by=['R2Score'], inplace=True, ascending = False)

    return results_list

In [4]:
RegressorResults_List = AssessRegressionModels(Train_X, Train_Y, Test_X, Test_Y, [
    XGBRegressor,
    GradientBoostingRegressor,
    ExtraTreeRegressor,
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
])

with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", 1000):
    print(RegressorResults_List.drop("ModelData", axis = 1))

Starting XGBRegressor at 2020-01-26 22:13:36.279712
	Ending XGBRegressor at 2020-01-26 22:20:14.596764
Starting GradientBoostingRegressor at 2020-01-26 22:20:14.596764
	Ending GradientBoostingRegressor at 2020-01-26 22:28:54.790691
Starting ExtraTreeRegressor at 2020-01-26 22:28:54.790691
	Ending ExtraTreeRegressor at 2020-01-26 22:31:35.610032
Starting LinearRegression at 2020-01-26 22:31:35.610032
	Ending LinearRegression at 2020-01-26 22:31:52.738531
Starting Ridge at 2020-01-26 22:31:52.738531
	Ending Ridge at 2020-01-26 22:31:59.964209
Starting Lasso at 2020-01-26 22:31:59.964209
	Ending Lasso at 2020-01-26 22:32:04.278824
Starting ElasticNet at 2020-01-26 22:32:04.278824
	Ending ElasticNet at 2020-01-26 22:32:08.800371
                        Name   R2Score      RMSE       MAE         RunTime
0               XGBRegressor  0.658575  0.188347  0.339912  0:06:38.311054
4                      Ridge  0.541973  0.252671  0.408997  0:00:07.218680
3           LinearRegression  0.541968  

In [10]:
import pickle
with open(FileSaveFolder + "RegressorResults.dat", "wb") as filePath:
    pickle.dump(RegressorResults_List, file=filePath)

### Trialing different XGB parameters

In [3]:
import pandas as pd
XGBResults = pd.DataFrame()

def AssessXGBRegression(x_train, y_train, x_test, y_test, Model):
    from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
    from datetime import datetime

    loopStartTime = datetime.now()
    print("Starting XGBRegressor at " + str(loopStartTime))

    Model.fit(x_train, y_train)
    xgbPredictor = Model.predict(x_test)

    loopEndTime = datetime.now()
    # results container
    results_list = pd.DataFrame(data = {
        "Name" : "XGBRegressor",
        "R2Score": r2_score(y_test, xgbPredictor),
        "RMSE": (mean_squared_error(y_test, xgbPredictor)),
        "MAE": mean_absolute_error(y_test, xgbPredictor),
        "ModelData" : Model,
        "RunTime": [str(loopEndTime - loopStartTime)],
        })

    print("\tEnding XGBRegressor at " + str(datetime.now()))

    return results_list

In [4]:
import pandas as pd
import copy

#for maxDepth in [6, 9, 12]:
for maxDepth in [15]:
    import gc
    gc.collect()
#    for learningRate in [0.01, 0.05, 0.1, 0.33, 0.5]:
    for learningRate in [0.1]:
        from xgboost import XGBRegressor
        import warnings
        warnings.simplefilter(action='ignore', category=FutureWarning)
#        for estimatorsCount in [100, 200, 400, 800]:
        for estimatorsCount in [1600]:
        #estimatorsCount = 50 / learningRate

            xgbModel = XGBRegressor(objective = "reg:squarederror",
                                    colsample_bytree = 1,
                                    colsample_bylevel = 1,
                                    colsample_bynode = 1,
                                    learning_rate = learningRate,
                                    max_depth = maxDepth,
                                    tree_method = "hist",
                                    grow_policy = "lossguide",
                                    n_estimators = estimatorsCount,
                                    nthread = 6,
                                   )

            testTrainResults = AssessXGBRegression(Train_X, Train_Y, Test_X, Test_Y, Model = xgbModel)
            testTrainResults["LearnRate"] = learningRate
            testTrainResults["MaxDepth"] = maxDepth
            testTrainResults["Estimators"] = estimatorsCount

            with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", 20):
                display(testTrainResults.drop(["Name", "ModelData"], axis = 1))

            XGBResults = XGBResults.append(copy.deepcopy(testTrainResults), ignore_index = True)

            del xgbModel
            del testTrainResults
            gc.collect()

print("Loops Complete.")

Starting XGBRegressor at 2020-01-28 08:18:32.207798
	Ending XGBRegressor at 2020-01-28 09:15:55.255959


Unnamed: 0,R2Score,RMSE,MAE,RunTime,LearnRate,MaxDepth,Estimators
0,0.678418,0.177401,0.325521,0:57:23.044162,0.1,15,1600


Loops Complete.


In [5]:
XGBResults.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 9 columns):
Name          11 non-null object
R2Score       11 non-null float64
RMSE          11 non-null float64
MAE           11 non-null float64
ModelData     11 non-null object
RunTime       11 non-null object
LearnRate     11 non-null float64
MaxDepth      11 non-null int64
Estimators    11 non-null int64
dtypes: float64(4), int64(2), object(3)
memory usage: 920.0+ bytes


In [4]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,-1.295799,1.266481,0.95704,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:00:53.815709,0.01,6,100
1,XGBRegressor,0.20075,0.440907,0.555921,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:24.839741,0.01,6,200
2,XGBRegressor,0.508864,0.270936,0.428631,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:27.005766,0.01,6,400
3,XGBRegressor,0.58116,0.231054,0.387177,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:04:18.020098,0.01,6,800
4,XGBRegressor,0.538833,0.254403,0.411359,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:00:47.965588,0.05,6,100
5,XGBRegressor,0.599257,0.22107,0.37675,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:15.937601,0.05,6,200
6,XGBRegressor,0.638384,0.199486,0.354655,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:10.311130,0.05,6,400
7,XGBRegressor,0.664054,0.185325,0.340152,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:04:00.164835,0.05,6,800
8,XGBRegressor,0.598448,0.221517,0.37677,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:00:48.684357,0.1,6,100
9,XGBRegressor,0.637465,0.199993,0.354905,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:16.094550,0.1,6,200


In [6]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,0.642602,0.197159,0.351341,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:00:45.457394,0.33,6,100
1,XGBRegressor,0.65973,0.18771,0.341321,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:10.456362,0.33,6,200
2,XGBRegressor,0.665457,0.184551,0.337758,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:57.484249,0.33,6,400
3,XGBRegressor,0.665503,0.184526,0.337621,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:24.750214,0.33,6,800
4,XGBRegressor,0.6367,0.200415,0.353084,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:00:44.509698,0.5,6,100
5,XGBRegressor,0.642804,0.197048,0.349631,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:08.902862,0.5,6,200
6,XGBRegressor,0.64112,0.197977,0.350026,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:57.406278,0.5,6,400
7,XGBRegressor,0.635334,0.201169,0.35215,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:23.678557,0.5,6,800


In [5]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,-1.239562,1.235457,0.954942,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:42.850954,0.01,9,100
1,XGBRegressor,0.258766,0.408903,0.529119,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:12.793055,0.01,9,200
2,XGBRegressor,0.560077,0.242684,0.398456,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:05:50.837234,0.01,9,400
3,XGBRegressor,0.620921,0.20912,0.362775,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:10:36.213583,0.01,9,800
4,XGBRegressor,0.585563,0.228624,0.383543,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:38.397384,0.05,9,100
5,XGBRegressor,0.633914,0.201952,0.355232,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:46.739426,0.05,9,200
6,XGBRegressor,0.66311,0.185846,0.338549,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:05:05.628800,0.05,9,400
7,XGBRegressor,0.682078,0.175382,0.327309,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:09:36.930630,0.05,9,800
8,XGBRegressor,0.632436,0.202767,0.35568,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:34.519630,0.1,9,100
9,XGBRegressor,0.660056,0.187531,0.340118,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:47.567159,0.1,9,200


In [6]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,0.645952,0.195311,0.346346,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:27.228973,0.33,9,100
1,XGBRegressor,0.653447,0.191176,0.341744,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:27.903478,0.33,9,200
2,XGBRegressor,0.653301,0.191257,0.341347,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:04:10.221604,0.33,9,400
3,XGBRegressor,0.649908,0.193129,0.342928,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:07:08.588294,0.33,9,800
4,XGBRegressor,0.621275,0.208924,0.35772,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:23.884047,0.5,9,100
5,XGBRegressor,0.616739,0.211427,0.358951,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:21.570513,0.5,9,200
6,XGBRegressor,0.608165,0.216157,0.362979,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:59.405078,0.5,9,400
7,XGBRegressor,0.598624,0.221419,0.367071,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:07:05.547271,0.5,9,800


In [6]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,0.645952,0.195311,0.346346,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:27.935744,0.33,9,100
1,XGBRegressor,0.649908,0.193129,0.342928,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:07:08.410350,0.33,9,800
2,XGBRegressor,0.621275,0.208924,0.35772,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:01:24.060991,0.5,9,100
3,XGBRegressor,0.598624,0.221419,0.367071,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:07:02.369292,0.5,9,800
4,XGBRegressor,0.640007,0.198591,0.347138,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:01.709616,0.33,12,100
5,XGBRegressor,0.636096,0.200748,0.348415,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:15:07.315477,0.33,12,800
6,XGBRegressor,0.600747,0.220248,0.364892,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:55.548595,0.5,12,100
7,XGBRegressor,0.584032,0.229469,0.372324,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:16:06.669406,0.5,12,800


In [7]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,-1.202747,1.215148,0.953534,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:16.880753,0.01,12,100
1,XGBRegressor,0.296838,0.3879,0.511755,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:07:05.148399,0.01,12,200
2,XGBRegressor,0.590516,0.225892,0.379642,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:13:47.017277,0.01,12,400
3,XGBRegressor,0.641709,0.197651,0.349214,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:24:59.704141,0.01,12,800
4,XGBRegressor,0.611202,0.214481,0.367063,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:34.268155,0.05,12,100
5,XGBRegressor,0.650275,0.192926,0.343891,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:06:19.793971,0.05,12,200
6,XGBRegressor,0.672559,0.180633,0.331131,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:11:43.366006,0.05,12,400
7,XGBRegressor,0.685293,0.173608,0.323478,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:21:24.044433,0.05,12,800
8,XGBRegressor,0.646101,0.195229,0.346033,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:20.301643,0.1,12,100
9,XGBRegressor,0.666637,0.1839,0.334258,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:06:02.821425,0.1,12,200


In [6]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,0.640007,0.198591,0.347138,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:03:02.094492,0.33,12,100
1,XGBRegressor,0.639895,0.198652,0.347012,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:05:07.906068,0.33,12,200
2,XGBRegressor,0.638734,0.199293,0.347247,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:08:26.237344,0.33,12,400
3,XGBRegressor,0.636096,0.200748,0.348415,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:15:10.529444,0.33,12,800
4,XGBRegressor,0.600747,0.220248,0.364892,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:02:52.951430,0.5,12,100
5,XGBRegressor,0.595101,0.223363,0.367512,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:04:49.231070,0.5,12,200
6,XGBRegressor,0.588076,0.227239,0.37062,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:08:30.889850,0.5,12,400
7,XGBRegressor,0.584032,0.229469,0.372324,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:16:17.513922,0.5,12,800


In [6]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,0.641709,0.197651,0.349214,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:25:15.589038,0.01,12,800
1,XGBRegressor,0.669154,0.182511,0.333124,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:47:14.868786,0.01,12,1600
2,XGBRegressor,0.685293,0.173608,0.323478,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:21:45.547796,0.05,12,800
3,XGBRegressor,0.690969,0.170477,0.319711,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:37:50.588825,0.05,12,1600
4,XGBRegressor,0.683002,0.174872,0.324539,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:19:25.449250,0.1,12,800
5,XGBRegressor,0.684057,0.17429,0.323933,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:32:31.744096,0.1,12,1600
6,XGBRegressor,0.651979,0.191986,0.341812,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:50:34.489752,0.01,15,800
7,XGBRegressor,0.673009,0.180385,0.329403,"XGBRegressor(base_score=0.5, booster='gbtree',...",1:32:43.687522,0.01,15,1600
8,XGBRegressor,0.682502,0.175148,0.323666,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:41:08.684970,0.05,15,800
9,XGBRegressor,0.685652,0.17341,0.321714,"XGBRegressor(base_score=0.5, booster='gbtree',...",1:09:40.021949,0.05,15,1600


In [5]:
XGBResults

Unnamed: 0,Name,R2Score,RMSE,MAE,ModelData,RunTime,LearnRate,MaxDepth,Estimators
0,XGBRegressor,0.678418,0.177401,0.325521,"XGBRegressor(base_score=0.5, booster='gbtree',...",0:57:23.044162,0.1,15,1600


In [6]:
import pickle
with open(FileSaveFolder + "XGBRegressorResults10of8.dat", "wb") as filePath:
    pickle.dump(XGBResults, file=filePath)

In [5]:
XGBResults = AssessXGBRegression(Train_X, Train_Y, Test_X, Test_Y, LearningRate = 0.25, MaxDepth = 7)

with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", 100):
    print(XGBResults.drop("ModelData", axis = 1))

           Name  R2 Score  RMS Error  Mean Absolute Error   ExecutionTime
0  XGBRegressor  0.750683   0.062329             0.180223  0:05:09.930878


In [5]:
import pickle
with open("XGBRegressorResults.dat", "wb") as filePath:
    pickle.dump(XGBResults, file=filePath)

In [None]:
for x in range[1]:
    xgbModel = XGBRegressor(objective = "reg:squarederror",
                            colsample_bytree = 1,
                            colsample_bylevel = 1,
                            colsample_bynode = 1,
                            learning_rate = learningRate,
                            max_depth = maxDepth,
                            tree_method = "hist",
                            grow_policy = "lossguide",
                            n_estimators = estimatorsCount,
                            nthread = 6,
                           )
    XGBResults = AssessXGBRegression(Train_X, Train_Y, Test_X, Test_Y, xgbModel)
    with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", 100):
        print(XGBResults.drop("ModelData", axis = 1))

In [None]:
import gc
#Del XGBResults
gc.collect()
gc.collect()


In [None]:
import pickle
with open("XGBRegressorResults.dat", "wb") as filePath:
    pickle.dump(XGBResults, file=filePath)