In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

pd.set_option('future.no_silent_downcasting', True)

In [3]:
df_train = pd.read_csv("train.csv")
df_train_y = df_train["Transported"]
df_train.drop(["Transported"], axis=1, inplace=True)
df_test = pd.read_csv("test.csv")
df_test_y = pd.read_csv("sample_submission.csv")

In [4]:
def get_info(df):
    print("Total rows:", len(df_train))
    for col in df:
        d = df[col]
        c = d.unique()
        print(f"{col}: nan={d.isna().sum()}, mode={d.mode()[0]}, unique_c={len(c)}, unique={c}")

In [5]:
df_train.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines


In [6]:
get_info(df_train)

Total rows: 8693
PassengerId: nan=0, mode=0001_01, unique_c=8693, unique=['0001_01' '0002_01' '0003_01' ... '9279_01' '9280_01' '9280_02']
HomePlanet: nan=201, mode=Earth, unique_c=4, unique=['Europa' 'Earth' 'Mars' nan]
CryoSleep: nan=217, mode=False, unique_c=3, unique=[False True nan]
Cabin: nan=199, mode=G/734/S, unique_c=6561, unique=['B/0/P' 'F/0/S' 'A/0/S' ... 'G/1499/S' 'G/1500/S' 'E/608/S']
Destination: nan=182, mode=TRAPPIST-1e, unique_c=4, unique=['TRAPPIST-1e' 'PSO J318.5-22' '55 Cancri e' nan]
Age: nan=179, mode=24.0, unique_c=81, unique=[39. 24. 58. 33. 16. 44. 26. 28. 35. 14. 34. 45. 32. 48. 31. 27.  0.  1.
 49. 29. 10.  7. 21. 62. 15. 43. 47.  2. 20. 23. 30. 17. 55.  4. 19. 56.
 nan 25. 38. 36. 22. 18. 42. 37. 13.  8. 40.  3. 54.  9.  6. 64. 67. 61.
 50. 41. 57. 11. 52. 51. 46. 60. 63. 59.  5. 79. 68. 74. 12. 53. 65. 71.
 75. 70. 76. 78. 73. 66. 69. 72. 77.]
VIP: nan=203, mode=False, unique_c=3, unique=[False True nan]
RoomService: nan=181, mode=0.0, unique_c=1274, uniq

In [7]:
def make1(df):
    df = df.copy()
    df["Spent"] = df[["RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]].sum(axis=1)

    df = df.join(
        df["PassengerId"].str.split("_", expand=True).rename(
            columns={0:"GroupID", 1:"PassID"}
        )
    )

    df["GroupID"] = df["GroupID"].astype(int)

    df = df.join(
        df["Cabin"].str.split("/", expand=True).rename(
            columns={0:"CabinDeck", 1:"CabinNum", 2:"CabinSide"}
        )
    )

    # df = df.join(
    #     df["Name"].str.split(expand=True).rename(
    #         columns={0:"FirstName", 1:"LastName"}
    #     )
    # )

    df = df.drop(["PassengerId", "PassID", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck", "Cabin", "CabinNum", "Name", "Destination"], axis=1)

    return df

df_train_1 = make1(df_train)
df_train_1.head()

Unnamed: 0,HomePlanet,CryoSleep,Age,VIP,Spent,GroupID,CabinDeck,CabinSide
0,Europa,False,39.0,False,0.0,1,B,P
1,Earth,False,24.0,False,736.0,2,F,S
2,Europa,False,58.0,True,10383.0,3,A,S
3,Europa,False,33.0,False,5176.0,3,A,S
4,Earth,False,16.0,False,1091.0,4,F,S


In [8]:
get_info(df_train_1)

Total rows: 8693
HomePlanet: nan=201, mode=Earth, unique_c=4, unique=['Europa' 'Earth' 'Mars' nan]
CryoSleep: nan=217, mode=False, unique_c=3, unique=[False True nan]
Age: nan=179, mode=24.0, unique_c=81, unique=[39. 24. 58. 33. 16. 44. 26. 28. 35. 14. 34. 45. 32. 48. 31. 27.  0.  1.
 49. 29. 10.  7. 21. 62. 15. 43. 47.  2. 20. 23. 30. 17. 55.  4. 19. 56.
 nan 25. 38. 36. 22. 18. 42. 37. 13.  8. 40.  3. 54.  9.  6. 64. 67. 61.
 50. 41. 57. 11. 52. 51. 46. 60. 63. 59.  5. 79. 68. 74. 12. 53. 65. 71.
 75. 70. 76. 78. 73. 66. 69. 72. 77.]
VIP: nan=203, mode=False, unique_c=3, unique=[False True nan]
Spent: nan=0, mode=0.0, unique_c=2336, unique=[    0.   736. 10383. ...  8803.  8536.  4637.]
GroupID: nan=0, mode=984, unique_c=6217, unique=[   1    2    3 ... 9278 9279 9280]
CabinDeck: nan=199, mode=F, unique_c=9, unique=['B' 'F' 'A' 'G' nan 'E' 'D' 'C' 'T']
CabinSide: nan=199, mode=S, unique_c=3, unique=['P' 'S' nan]


In [9]:
def make2(df):
    df = df.copy()
    groups = df["GroupID"].value_counts()

    for g, count in groups.items():
        g_ind = df["GroupID"] == g
        group = df[g_ind]

        check = group.isna().any().any()
        if (not check):
            continue

        for col in ["HomePlanet", "CryoSleep", "VIP", "CabinDeck", "CabinSide"]:
            if ( (isna := group[col].isna()).sum()):
                mode = group[col].mode()
                if count > 1 and any(mode):
                    mode = mode[0]
                else:
                    mode = df[col].mode()[0]

                df.loc[g_ind & isna, col] = mode

        for col in ["Age", "Spent"]:
            if ( (isna := group[col].isna()).sum()):
                mean = group[col].mean()
                if np.isnan(mean):
                    mean = df[col].mean()
                df.loc[g_ind & isna, col] = mean

                # if not df.loc[g_ind & isna, col].any():
                #     print(group)
                #     print(df.loc[g_ind & isna, col], mean, not mean, mean == np.nan, np.isnan(mean))
            
        # print(df[g_ind])

    return df

df_train_2 = make2(df_train_1)

In [10]:
get_info(df_train_2)

Total rows: 8693
HomePlanet: nan=0, mode=Earth, unique_c=3, unique=['Europa' 'Earth' 'Mars']
CryoSleep: nan=0, mode=False, unique_c=2, unique=[False True]
Age: nan=0, mode=24.0, unique_c=110, unique=[39.         24.         58.         33.         16.         44.
 26.         28.         35.         14.         34.         45.
 32.         48.         31.         27.          0.          1.
 49.         29.         10.          7.         21.         62.
 15.         43.         47.          2.         20.         23.
 30.         17.         55.          4.         19.         56.
 28.80281942 25.         38.         36.         22.         18.
 42.         37.         13.          8.         40.          3.
 54.          9.          6.         64.         67.         61.
 50.         41.         57.         11.         52.         51.
 46.         60.         63.         36.33333333 59.          5.
 28.80281942 79.         68.         74.         12.         53.
 38.25       65.     

In [13]:
def make3(df):
    df = df.copy()
    df = df.join(
            [
                pd.get_dummies(df["HomePlanet"]).astype(int),
                pd.get_dummies(df["CabinDeck"]).astype(int),
                pd.get_dummies(df["CabinSide"]).astype(int)
            ]
        )
    

    df = df.drop(["HomePlanet", "CabinDeck", "CabinSide"], axis=1)

    df[["CryoSleep", "VIP"]] = df[["CryoSleep", "VIP"]].astype(int)

    return df

df_train_3 = make3(df_train_2)
df_train_3

Unnamed: 0,CryoSleep,Age,VIP,Spent,GroupID,Earth,Europa,Mars,A,B,C,D,E,F,G,T,P,S
0,0,39.0,0,0.0,1,0,1,0,0,1,0,0,0,0,0,0,1,0
1,0,24.0,0,736.0,2,1,0,0,0,0,0,0,0,1,0,0,0,1
2,0,58.0,1,10383.0,3,0,1,0,1,0,0,0,0,0,0,0,0,1
3,0,33.0,0,5176.0,3,0,1,0,1,0,0,0,0,0,0,0,0,1
4,0,16.0,0,1091.0,4,1,0,0,0,0,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,0,41.0,1,8536.0,9276,0,1,0,1,0,0,0,0,0,0,0,1,0
8689,1,18.0,0,0.0,9278,1,0,0,0,0,0,0,0,0,1,0,0,1
8690,0,26.0,0,1873.0,9279,1,0,0,0,0,0,0,0,0,1,0,0,1
8691,0,32.0,0,4637.0,9280,0,1,0,0,0,0,0,1,0,0,0,0,1


In [14]:
from sklearn.ensemble import RandomForestClassifier

rndf = RandomForestClassifier(max_depth=20)

rndf.fit(df_train_3, df_train_y)

In [15]:
rndf.score(df_train_3, df_train_y)

0.9898769124582998

In [16]:
import xgboost

xgb = xgboost.XGBClassifier(n_estimators=100, learning_rate=1.0)
xgb.fit(df_train_3, df_train_y)

In [None]:
xgb.score(df_train_3, df_train_y)

0.9647992637754516

In [None]:
df_test_3 = make3(make2(make1(df_test)))
df_test_3

Unnamed: 0,CryoSleep,Age,VIP,Spent,GroupID,CabinDeck,CabinSide,Earth,Europa,Mars
0,1,27.000000,0,0.0,13,6,1,1,0,0
1,0,19.000000,0,2832.0,18,5,1,1,0,0
2,1,31.000000,0,0.0,19,7,1,0,1,0
3,0,38.000000,0,7418.0,21,7,1,0,1,0
4,0,20.000000,0,645.0,23,5,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...
4272,1,34.000000,0,0.0,9266,6,1,1,0,0
4273,0,42.000000,0,1018.0,9269,5,1,1,0,0
4274,1,28.621492,0,0.0,9271,3,0,0,0,1
4275,0,28.621492,0,3203.0,9273,3,0,0,1,0


In [None]:
get_info( df_test_3 )

Total rows: 8693
CryoSleep: nan=0, mode=0, unique_c=2, unique=[1 0]
Age: nan=0, mode=18.0, unique_c=93, unique=[27.         19.         31.         38.         20.         21.
 23.         24.         45.         44.         46.         29.
 40.         30.         14.         66.         36.         18.
 26.         48.          6.         33.          1.         17.
 34.          5.         22.         16.          7.          2.
  0.         56.         39.         28.62149216 35.          8.
  3.          9.         25.         42.         50.         41.
 32.         49.         55.         60.         47.         58.
 28.         12.         52.         15.         29.42857143 43.
 54.         70.         37.         59.         61.         63.
 39.33333333 53.          4.         10.         65.         13.
 51.         79.         19.5        28.62149216 74.         67.
 69.         57.         72.         75.         11.         16.5
 73.         71.         28.61312348 64.   

In [None]:
pred_f = rndf.predict(df_test_3)
pred_x = xgb.predict(df_test_3)

accuracy_score(pred_f, pred_x)

0.8447509936871639

# Lab 5

In [18]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from skopt import BayesSearchCV
from skopt.space import Integer, Real
from xgboost import XGBClassifier
from scipy.stats import randint, uniform

In [19]:
def show_cv_info(cv):
    df_results_brf = pd.DataFrame(cv.cv_results_)

    print( cv.best_params_, cv.best_score_)
    return df_results_brf.sort_values("rank_test_score").head(3)

In [None]:
grids_rf = [
    {
        "n_estimators": [10, 100, 1000],
        "max_depth": [None, 10, 100],
        "min_samples_split": [2, 5, 10]
        # "criterion": ["gini", "entropy", "log_loss"],
        # "n_jobs": [-1]
    }
]

grid_search_rf = GridSearchCV(RandomForestClassifier(), grids_rf, n_jobs=-1, verbose=10)

grid_search_rf.fit(df_train_3, df_train_y)


Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [None]:
show_cv_info(grid_search_rf)

{'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 10} 0.7099999272097307


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
15,0.099368,0.00559,0.005217,0.000398,10,10,10,"{'max_depth': 10, 'min_samples_split': 10, 'n_...",0.691202,0.700403,0.709028,0.732451,0.716916,0.71,0.01413,1
16,0.899522,0.019493,0.025492,0.003904,10,10,100,"{'max_depth': 10, 'min_samples_split': 10, 'n_...",0.634848,0.698102,0.717654,0.73878,0.71519,0.700915,0.03547,2
14,9.119944,0.140338,0.238467,0.030288,10,5,1000,"{'max_depth': 10, 'min_samples_split': 5, 'n_e...",0.636573,0.698677,0.719379,0.737054,0.711738,0.700684,0.034377,3


In [24]:
rgrids_rf = [
    {
        "n_estimators": randint(10, 1000),
        "max_depth": randint(5, 100),
        "min_samples_split": randint(2, 10),
        # "criterion": ["gini", "entropy", "log_loss"],
        # "n_jobs": [-1]
    }
]

rgrid_search_rf = RandomizedSearchCV(RandomForestClassifier(), rgrids_rf, n_jobs=-1, verbose=10, n_iter=20)

rgrid_search_rf.fit(df_train_3, df_train_y)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [25]:
show_cv_info(rgrid_search_rf)

{'max_depth': 8, 'min_samples_split': 9, 'n_estimators': 423} 0.7277181375484634


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
6,3.223157,0.152351,0.093118,0.012855,8,9,423,"{'max_depth': 8, 'min_samples_split': 9, 'n_es...",0.688902,0.708453,0.73203,0.767549,0.741657,0.727718,0.027119,1
4,4.004558,0.182162,0.10699,0.01632,13,6,379,"{'max_depth': 13, 'min_samples_split': 6, 'n_e...",0.690627,0.696952,0.72743,0.7313,0.719217,0.713105,0.01637,2
1,13.677238,0.235032,0.327857,0.046329,39,9,997,"{'max_depth': 39, 'min_samples_split': 9, 'n_e...",0.691777,0.685451,0.72743,0.708285,0.722094,0.707008,0.016389,3


In [20]:
bgrids_rf = [
    {
        "n_estimators": Integer(10, 1000),
        "max_depth": Integer(5, 100),
        "min_samples_split": Integer(2, 10),
        # "criterion": ["gini", "entropy", "log_loss"],
        # "n_jobs": [-1]
    }
]

bgrid_search_rf = BayesSearchCV(RandomForestClassifier(), bgrids_rf, n_jobs=-1, verbose=10, n_iter=20)

bgrid_search_rf.fit(df_train_3, df_train_y)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

In [21]:
show_cv_info(bgrid_search_rf)

OrderedDict({'max_depth': 5, 'min_samples_split': 7, 'n_estimators': 10}) 0.7363429903963166


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
12,0.085915,0.004129,0.004563,0.000704,5,7,10,"{'max_depth': 5, 'min_samples_split': 7, 'n_es...",0.706728,0.73893,0.733755,0.762946,0.739356,0.736343,0.017929,1
13,3.999953,0.039624,0.114425,0.01039,5,10,911,"{'max_depth': 5, 'min_samples_split': 10, 'n_e...",0.691202,0.73778,0.72973,0.767549,0.744534,0.734159,0.024902,2
16,3.292879,0.046304,0.081987,0.001454,5,8,728,"{'max_depth': 5, 'min_samples_split': 8, 'n_es...",0.692352,0.736055,0.72973,0.763521,0.74626,0.733584,0.023571,3


In [None]:
pred_rf_g = bgrid_search_rf.predict(df_test_3)
pred_rf_r = bgrid_search_rf.predict(df_test_3)
pred_rf_b = bgrid_search_rf.predict(df_test_3)
accuracy_score(pred_f, pred_rf_g),accuracy_score(pred_f, pred_rf_r),accuracy_score(pred_f, pred_rf_b),accuracy_score(pred_rf_g, pred_rf_b),accuracy_score(pred_rf_g, pred_rf_r),accuracy_score(pred_rf_r, pred_rf_b)

(0.8480243161094225, 0.8480243161094225, 0.8480243161094225, 1.0, 1.0, 1.0)

In [22]:
grids_xgb = [
    {
        "n_estimators": [10, 100, 1000],
        "max_depth": [None, 10, 100],
        # "learning_rate": [0.1, 0.5, 1.0]
        # "criterion": ["gini", "entropy", "log_loss"],
        # "n_jobs": [-1]
    }
]

grid_search_xgb = GridSearchCV(XGBClassifier(), grids_xgb, n_jobs=-1, verbose=10)

grid_search_xgb.fit(df_train_3, df_train_y)


Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [23]:
show_cv_info(grid_search_xgb)

{'max_depth': 10, 'n_estimators': 10} 0.6324634013834121


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
3,0.104391,0.008434,0.015136,0.00443,10.0,10,"{'max_depth': 10, 'n_estimators': 10}",0.692352,0.667625,0.533065,0.560414,0.708861,0.632463,0.071737,1
6,0.377753,0.021526,0.013327,0.002877,100.0,10,"{'max_depth': 100, 'n_estimators': 10}",0.691777,0.6636,0.545716,0.547181,0.707135,0.631082,0.070501,2
0,0.062058,0.006741,0.016432,0.001013,,10,"{'max_depth': None, 'n_estimators': 10}",0.688902,0.686601,0.525014,0.520138,0.706559,0.625443,0.084288,3


In [None]:
rgrids_xgb = [
    {
        "n_estimators": randint(10, 1000),
        "max_depth": randint(10, 100),
        # "learning_rate": uniform(0.1, 1.0),
        # "criterion": ["gini", "entropy", "log_loss"],
        # "n_jobs": [-1]
    }
]

rgrid_search_xgb = RandomizedSearchCV(XGBClassifier(), rgrids_xgb, n_jobs=-1, verbose=10, n_iter=50)

rgrid_search_xgb.fit(df_train_3, df_train_y)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


In [None]:
show_cv_info(rgrid_search_xgb)

{'max_depth': 82, 'n_estimators': 10} 0.6321169858740557


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
19,0.414494,0.043514,0.011758,0.002137,82,10,"{'max_depth': 82, 'n_estimators': 10}",0.692352,0.658424,0.552616,0.548907,0.708285,0.632117,0.068361,1
37,0.654357,0.048449,0.018515,0.004362,87,19,"{'max_depth': 87, 'n_estimators': 19}",0.688902,0.658424,0.53709,0.537399,0.704833,0.62533,0.073451,2
24,0.655531,0.029911,0.017256,0.002172,35,30,"{'max_depth': 35, 'n_estimators': 30}",0.682001,0.652099,0.525589,0.529919,0.702532,0.618428,0.075765,3


In [None]:
bgrids_xbg = [
    {
        "n_estimators": Integer(10, 1000),
        "max_depth": Integer(5, 100),
        # "learning_rate": Real(0.1, 1.0),
        # "criterion": ["gini", "entropy", "log_loss"],
        # "n_jobs": [-1]
    }
]

bgrid_search_xbg = BayesSearchCV(XGBClassifier(), bgrids_xbg, n_jobs=-1, verbose=10, n_iter=10)

bgrid_search_xbg.fit(df_train_3, df_train_y)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits


In [None]:
show_cv_info(bgrid_search_xbg)

OrderedDict({'max_depth': 70, 'n_estimators': 159}) 0.6110661723104492


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,1.160596,0.030002,0.043775,0.01041,70,159,"{'max_depth': 70, 'n_estimators': 159}",0.685451,0.634848,0.514664,0.52359,0.696778,0.611066,0.077962,1
6,1.485655,0.036713,0.070396,0.017943,35,286,"{'max_depth': 35, 'n_estimators': 286}",0.684876,0.627372,0.506613,0.528193,0.70023,0.609457,0.079284,2
1,1.795767,0.027086,0.080801,0.02386,41,364,"{'max_depth': 41, 'n_estimators': 364}",0.687752,0.623347,0.515814,0.521864,0.696203,0.608996,0.077831,3


In [None]:
bgrid_search_xbg.score(df_train_3, df_train_y), xgb.score(df_train_3, df_train_y)

(0.9967790176003681, 0.9647992637754516)

In [None]:
pred_x_g = bgrid_search_xbg.predict(df_test_3)
pred_x_r = bgrid_search_xbg.predict(df_test_3)
pred_x_b = bgrid_search_xbg.predict(df_test_3)
accuracy_score(pred_x, pred_x_g),accuracy_score(pred_x, pred_x_r),accuracy_score(pred_x, pred_x_b),accuracy_score(pred_x_g, pred_x_b),accuracy_score(pred_x_g, pred_x_r),accuracy_score(pred_x_r, pred_x_b)

(0.8664952069207388, 0.8664952069207388, 0.8664952069207388, 1.0, 1.0, 1.0)

In [None]:
accuracy_score(pred_f, pred_x),accuracy_score(pred_rf_g, pred_x_g),accuracy_score(pred_rf_r, pred_x_r),accuracy_score(pred_rf_b, pred_x_b)

(0.8447509936871639,
 0.8038344634089315,
 0.8038344634089315,
 0.8038344634089315)