In [2]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

data = pd.read_csv("data/dataset_cifar10_v1.csv")

In [3]:
data
for col in ['convblock1', 'convblock2', 'convblock3', 'convblock4', 'convblock5']:
    data[col] = data[col].astype('category')

In [4]:
data.columns

Index(['out_channel0', 'M', 'R1', 'R2', 'R3', 'R4', 'R5', 'convblock1',
       'widenfact1', 'B1', 'convblock2', 'widenfact2', 'B2', 'convblock3',
       'widenfact3', 'B3', 'convblock4', 'widenfact4', 'B4', 'convblock5',
       'widenfact5', 'B5', '1_day_accuracy', '1_day_accuracy_std', 'AVM'],
      dtype='object')

In [5]:
data['convblock1']

0       B
1       D
2       C
3       B
4       D
       ..
1195    B
1196    A
1197    A
1198    C
1199    D
Name: convblock1, Length: 1200, dtype: category
Categories (4, object): ['A', 'B', 'C', 'D']

In [7]:
X = data.iloc[:,:-3]
y = data['1_day_accuracy']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data into DMatrix, which is optimized for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
dtest = xgb.DMatrix(X_test, label=y_test, enable_categorical=True)

# Set up parameters for xgboost
params = {
    'max_depth': 4,
    'eta': 0.1,
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse'
}

# Number of training iterations
num_round = 150000

# Train the model
bst = xgb.train(params, dtrain, num_round)

# Predict the test set
predictions = bst.predict(dtest)

# Evaluate the model using RMSE
rmse = mean_squared_error(y_test, predictions, squared=False)
print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.05485483393534464




In [None]:
predictions

array([0.92527896, 0.698922  , 0.9285322 , 0.79357475, 0.92794824,
       0.90192604, 0.9217329 , 0.9222144 , 0.919206  , 0.89231473,
       0.94403505, 0.91795313, 0.9243637 , 0.7313592 , 0.78250974,
       0.93121356, 0.9284215 , 0.92271405, 0.68565196, 0.7726965 ,
       0.9198173 , 0.745436  , 0.76385695, 0.7368394 , 0.7411836 ,
       0.8956946 , 0.9229256 , 0.9004783 , 0.76968116, 0.9087528 ,
       0.921425  , 0.73511314, 0.9156848 , 0.6865857 , 0.9238393 ,
       0.9072059 , 0.9284473 , 0.9399003 , 0.94460714, 0.710969  ,
       0.73944426, 0.72921455, 0.93056947, 0.7454101 , 0.90201265,
       0.91888285, 0.89232665, 0.9191742 , 0.9249999 , 0.7201237 ,
       0.9229311 , 0.8999815 , 0.92364126, 0.9304185 , 0.9185908 ,
       0.9258892 , 0.7524507 , 0.9227874 , 0.92726123, 0.9022788 ,
       0.90407884, 0.9182474 , 0.90373373, 0.76525885, 0.92103374,
       0.9160238 , 0.9091965 , 0.74233234, 0.7816369 , 0.92242754,
       0.7355515 , 0.71748376, 0.8659797 , 0.7179335 , 0.90591

In [None]:
y_test

1178    0.933167
865     0.830814
101     0.925458
439     0.882374
58      0.931208
          ...   
382     0.910176
867     0.666236
542     0.887508
1193    0.934182
874     0.644247
Name: 1_day_accuracy, Length: 240, dtype: float64

In [None]:
bst.save_model('model.json')