In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb
import time 

import pandas as pd

# Specify the path to your CSV file
csv_file_path = 'C:/revise/model_sample.csv' 

# Read the CSV file into a DataFrame
model_sample = pd.read_csv(csv_file_path)


# Set seed for reproducibility
np.random.seed(85298)

# Sample split
nr = model_sample.shape[0]
ntrain = round(nr * 0.67)
tindex = np.random.choice(nr, ntrain, replace=False)

# Create train/test sets
train_df = model_sample.iloc[tindex, 1:]
train_tgt = model_sample.iloc[tindex, 0]

test_df = model_sample.drop(tindex).iloc[:, 1:]
test_tgt = model_sample.drop(tindex).iloc[:, 0]

# Convert to NumPy arrays
trainx = train_df.values
testx = test_df.values
trainy = train_tgt.values
testy = test_tgt.values
# Full dataset for scoring
allx = model_sample.drop(columns='payer').values
ally = model_sample['payer'].values

dall = xgb.DMatrix(data=allx, label=ally)

# Training and testing matrices
dtrain = xgb.DMatrix(data=trainx, label=trainy)
dtest = xgb.DMatrix(data=testx, label=testy)

# Combined for final model
combined_data = np.vstack([trainx, testx])
combined_label = np.hstack([trainy, testy])
dcombined = xgb.DMatrix(data=combined_data, label=combined_label)

watchlist = [(dtrain, 'train'), (dtest, 'val')]
# Start timing
start_time = time.time()

best_auc_list = []
parameters_list = []

np.random.seed(20)
for _ in range(100):
    param = {
        'booster': 'gbtree',
        'objective': 'binary:logistic',
        'max_depth': np.random.randint(1, 8),
        'nrounds': int(np.random.choice([100, 250, 500, 750, 1000])),
        'eta': np.random.uniform(0.001, 0.15),
        'gamma': float(np.random.choice([0, 0.5, 0.75, 0.9])),
        'lambda': float(np.random.choice([0.5, 0.6, 0.7, 0.8, 1])),
        'alpha': float(np.random.choice([0.5, 0.6, 0.7, 0.8, 0.9, 1])),
        'subsample': np.random.uniform(0.5, 1),
        'colsample_bytree': np.random.uniform(0.5, 1),
        'colsample_bylevel': np.random.uniform(0.5, 1),
        'colsample_bynode': np.random.uniform(0.5, 1),
        'min_child_weight': int(np.random.choice(range(10, 16)))
    }
    parameters_list.append(param)
# Start timing
start_time = time.time()

best_auc_list = []
parameters_list = []

np.random.seed(20)
for _ in range(100):
    param = {
        'booster': 'gbtree',
        'objective': 'binary:logistic',
        'max_depth': np.random.randint(1, 8),
        'nrounds': int(np.random.choice([100, 250, 500, 750, 1000])),
        'eta': np.random.uniform(0.001, 0.15),
        'gamma': float(np.random.choice([0, 0.5, 0.75, 0.9])),
        'lambda': float(np.random.choice([0.5, 0.6, 0.7, 0.8, 1])),
        'alpha': float(np.random.choice([0.5, 0.6, 0.7, 0.8, 0.9, 1])),
        'subsample': np.random.uniform(0.5, 1),
        'colsample_bytree': np.random.uniform(0.5, 1),
        'colsample_bylevel': np.random.uniform(0.5, 1),
        'colsample_bynode': np.random.uniform(0.5, 1),
        'min_child_weight': int(np.random.choice(range(10, 16)))
    }
    parameters_list.append(param)
for idx, param in enumerate(parameters_list):
    evals_result = {}  # Dictionary to hold eval results

    model = xgb.train(
        {
            'booster': param['booster'],
            'objective': param['objective'],
            'max_depth': param['max_depth'],
            'eta': param['eta'],
            'subsample': param['subsample'],
            'gamma': param['gamma'],
            'alpha': param['alpha'],
            'lambda': param['lambda'],
            'colsample_bytree': param['colsample_bytree'],
            'colsample_bylevel': param['colsample_bylevel'],
            'colsample_bynode': param['colsample_bynode'],
            'min_child_weight': param['min_child_weight'],
            'eval_metric': 'auc',
            'device': 'cuda'          
            },
        dtrain,
        num_boost_round=param['nrounds'],
        early_stopping_rounds=20,
        evals=watchlist,
        evals_result=evals_result,
        verbose_eval=False
    )

    best_auc = max(evals_result['val']['auc'])
    best_auc_list.append(best_auc)
    print(f"nfeatures: {trainx.shape[1]}, iteration: {idx + 1}")
# Combine results
results_df = pd.DataFrame(parameters_list)
results_df.insert(0, 'best_auc', best_auc_list)

# Time taken
end_time = time.time()
time_taken = end_time - start_time
print(f"Total time taken: {time_taken:.2f} seconds")

