# Data and Constants

In [1]:
import pandas as pd
import numpy as np
import re
from scipy.spatial import cKDTree
from scipy.stats import norm
import matplotlib.pyplot as plt
from pycaret.regression import *
from multiprocessing import Pool, cpu_count


In [2]:
lipid_path = 'data/section12/lipids_section_12.h5'
gene_path = 'data/section12/genes_section_12.h5'

# Helper Functions

In [3]:
def gaussian_weight(distance, std_dev):
    return norm.pdf(distance, 0, std_dev)

In [4]:
def exponential_decay(distance, threshold, decay_rate):
    adjusted_distance = distance - threshold
    return np.exp(-decay_rate * adjusted_distance)

In [5]:

def filter_dataframe_from_file(df, filepath):
    # Read the text file and store the entries in a list
    with open(filepath, 'r') as file:
        entries = file.read().splitlines()

    # Filter the DataFrame based on the index matching the entries
    filtered_df = df.loc[df.index.isin(entries)]

    return filtered_df

# Loading Dataset

In [6]:
# Loading the dataset
lipids_section_12 = pd.read_hdf(lipid_path)
genes_section_12 = pd.read_hdf(gene_path)

# Need to remove the trailing naming scheme added before
new_column_names = [re.sub(r'_(\d+)$', '', col) for col in lipids_section_12.columns]
lipids_section_12.columns = new_column_names

# Nearest Neighbor selection

In [7]:
# Create a KDTree object for the genes
genes_coords = genes_section_12[['y_ccf', 'z_ccf']].values
genes_kdtree = cKDTree(genes_coords)

# Extract coordinates from section_12 for lipids
lipids_coords = lipids_section_12[['y_ccf', 'z_ccf']].values

# Find the indices of the closest gene for each lipid point
_, indices = genes_kdtree.query(lipids_coords, k=1)

# Initialize an empty array for aggregated gene data
aggregated_gene_data = np.zeros((len(lipids_coords), genes_section_12.iloc[:, 46:-50].shape[1]))

# Aggregate gene data based on the closest neighbor
for i, gene_index in enumerate(indices):
    aggregated_gene_data[i] = genes_section_12.iloc[gene_index, 46:-50]

# Convert the aggregated data into a DataFrame
aggregated_gene_data_df = pd.DataFrame(aggregated_gene_data, columns=genes_section_12.columns[46:-50])

# Exponential Decay

In [14]:
# Create a KDTree object for the genes
genes_coords = genes_section_12[['y_ccf', 'z_ccf']].values
genes_kdtree = cKDTree(genes_coords)

# Extract coordinates from section_12 for lipids
lipids_coords = lipids_section_12[['y_ccf', 'z_ccf']].values

# Find the distances and indices of the 6 closest genes for each lipid point
distances, indices = genes_kdtree.query(lipids_coords, k=6)

# Calculate the average distance
average_closest_distance = np.mean(distances)

# Query to get the 100 closest genes and their distances
distances, indices = genes_kdtree.query(lipids_coords, k=1000)

# Initialize an empty array for aggregated gene data
aggregated_gene_data = np.zeros((len(lipids_coords), genes_section_12.iloc[:, 46:-50].shape[1]))

# Perform weighted aggregation
for i, (gene_indices, dists) in enumerate(zip(indices, distances)):
    # Weights based on distance, with a penalty for distances greater than the average closest distance
    weights = np.where(dists <= average_closest_distance, 1, exponential_decay(dists, average_closest_distance, 3))  # Apply penalty for dist > average_closest_distance
    weighted_data = genes_section_12.iloc[gene_indices, 46:-50] * weights[:, np.newaxis]
    aggregated_gene_data[i] = weighted_data.sum(axis=0) / weights.sum() if weights.sum() > 0 else np.zeros(genes_section_12.iloc[:, 46:-50].shape[1])

# Convert the aggregated data into a DataFrame
aggregated_gene_data_df = pd.DataFrame(aggregated_gene_data, columns=genes_section_12.columns[46:-50])


# K neighbors gaussian mean of genes for a given lipids datapoint

In [21]:
# Create a KDTree object for the genes
genes_coords = genes_section_12[['y_ccf', 'z_ccf']].values
genes_kdtree = cKDTree(genes_coords)

# Extract coordinates from section_12 for lipids
lipids_coords = lipids_section_12[['y_ccf', 'z_ccf']].values

# Find the distances and indices of the closest 6 genes for each lipid point
distances, indices = genes_kdtree.query(lipids_coords, k=6)

# Calculate the std of the distances
std_closest_distance = np.std(distances)

# Query to get the 100 closest genes and their distances
distances, indices = genes_kdtree.query(lipids_coords, k=1000)

# Initialize an empty array for aggregated gene data
aggregated_gene_data = np.zeros((len(lipids_coords), genes_section_12.iloc[:, 46:-50].shape[1]))

# Perform weighted aggregation
for i, (gene_indices, dists) in enumerate(zip(indices, distances)):
    # Weights based on distance, with a penalty for distances greater than the average closest distance
    weights = gaussian_weight(dists, std_closest_distance)
    weighted_data = genes_section_12.iloc[gene_indices, 46:-50] * weights[:, np.newaxis]
    aggregated_gene_data[i] = weighted_data.sum(axis=0) / weights.sum() if weights.sum() > 0 else np.zeros(genes_section_12.iloc[:, 46:-50].shape[1])

# Convert the aggregated data into a DataFrame
aggregated_gene_data_df = pd.DataFrame(aggregated_gene_data, columns=genes_section_12.columns[46:-50])

# Take all the points into consideration

In [8]:
def process_lipid_point(lipid_coord, genes_coords, gene_data, std_closest_distance):
    """Process a single lipid point and return the aggregated data."""
    # Calculate distances to all gene points
    dists = np.linalg.norm(genes_coords - lipid_coord, axis=1)

    # Weights based on distance, with a penalty for distances greater than the average closest distance
    weights = gaussian_weight(dists, std_closest_distance)

    # Perform weighted aggregation
    weighted_data = gene_data * weights[:, np.newaxis]
    return weighted_data.sum(axis=0) / weights.sum() if weights.sum() > 0 else np.zeros(gene_data.shape[1])

In [9]:
lipids_coords = lipids_section_12[['y_ccf', 'z_ccf']].values
genes_coords = genes_section_12[['y_ccf', 'z_ccf']].values

gene_data_columns = genes_section_12.columns[46:-50]
gene_data_shape = genes_section_12[gene_data_columns].shape[1]

# Initialize an empty array for aggregated gene data
aggregated_gene_data = np.zeros((len(lipids_coords), gene_data_shape))

# Calculate the std of the distances of the closest neighbors using KDTree
genes_kdtree = cKDTree(genes_coords)
distances, _ = genes_kdtree.query(lipids_coords, k=6)
std_closest_distance = np.std(distances)

# Extract gene data only once
gene_data = genes_section_12[gene_data_columns].values

# Parallel processing
with Pool(cpu_count()) as pool:
    results = pool.starmap(process_lipid_point, [(lipid_coord, genes_coords, gene_data, std_closest_distance) for lipid_coord in lipids_coords])

# Combine results into aggregated_gene_data
for i, result in enumerate(results):
    aggregated_gene_data[i] = result

# Convert the aggregated data into a DataFrame
aggregated_gene_data_df = pd.DataFrame(aggregated_gene_data, columns=gene_data_columns)


In [22]:
aggregated_gene_data_df

Unnamed: 0,ENSMUST00000028118,ENSMUST00000028280,ENSMUST00000030676,ENSMUST00000047328,ENSMUST00000057021,ENSMUST00000090697,ENSMUST00000091554,ENSMUST00000162772,ENSMUST00000021284,ENSMUST00000022195,...,ENSMUST00000109964,ENSMUST00000114553,ENSMUST00000152412,ENSMUST00000159365,ENSMUST00000175965,ENSMUST00000196378,ENSMUST00000228095,ENSMUST00000000219,ENSMUST00000035577,ENSMUST00000060943
0,0.147987,0.462327,0.072980,0.737074,0.393011,0.036840,0.010277,0.046437,0.388947,0.061269,...,2.253574e-02,0.004449,3.522729e-03,0.181875,0.000367,0.229580,1.073747e-03,2.887407e-03,0.010568,5.719403e-04
1,0.145376,0.438662,0.072397,0.723101,0.391337,0.046304,0.012539,0.045965,0.375394,0.054864,...,2.113728e-02,0.005848,5.497692e-03,0.187041,0.000451,0.239797,1.107482e-03,2.221527e-03,0.013870,6.551450e-04
2,0.136072,0.409503,0.071754,0.713176,0.383924,0.056158,0.016338,0.047515,0.362651,0.046586,...,1.805532e-02,0.008643,8.560258e-03,0.194036,0.000720,0.247177,1.334278e-03,1.823251e-03,0.018507,7.652190e-04
3,0.122332,0.378809,0.071948,0.708389,0.372212,0.065685,0.021395,0.051439,0.352460,0.037783,...,1.408733e-02,0.012332,1.268588e-02,0.201241,0.001189,0.251840,1.785261e-03,1.665490e-03,0.023573,9.222543e-04
4,0.107172,0.350215,0.073201,0.707679,0.359079,0.073976,0.027104,0.057717,0.344678,0.030314,...,1.012393e-02,0.016146,1.745457e-02,0.206715,0.001865,0.253557,2.430526e-03,1.812998e-03,0.028051,1.139714e-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94742,0.179308,0.783596,0.071025,1.242076,0.912762,0.000876,0.025595,0.003868,0.902205,0.000017,...,5.239804e-06,0.040863,9.169214e-06,0.162888,0.082121,0.254811,1.044477e-06,1.516171e-05,0.001038,5.425096e-09
94743,0.178182,0.746224,0.079776,1.251607,0.935562,0.000321,0.038039,0.007169,0.889543,0.000015,...,3.037505e-06,0.052966,1.998711e-06,0.201139,0.076683,0.211145,5.171308e-07,5.838183e-06,0.001985,3.417587e-09
94744,0.174173,0.705389,0.088836,1.255733,0.959206,0.000132,0.054116,0.012900,0.875982,0.000014,...,1.690604e-06,0.067753,4.494659e-07,0.237964,0.068000,0.173529,2.457966e-07,2.196025e-06,0.003629,2.211258e-09
94745,0.167855,0.661175,0.095272,1.252810,0.988879,0.000095,0.077583,0.024482,0.854269,0.000017,...,1.291216e-06,0.085074,2.676939e-07,0.270275,0.056731,0.137119,1.620983e-07,1.102981e-06,0.007266,2.438017e-09


In [16]:
section_12_lipids_only = lipids_section_12.iloc[:, 3:-3]
section_12_lipids_only

Unnamed: 0,LPC O-16:2,LPC 16:0_dup,LPC O- 18:3,LPC O-18:2,LPC O-16:2_dup,LPC 15:1,LPC 18:1,LPC 18:0_dup,LPC 16:0,LPC O-18:3,...,SM(t42:1),PC(40:7),PC 40:6_dup,PG(42:6),Hex2Cer 32:0,SHexCer 38:1;3,PE(44:11(OH)),PC(40:4),PS(40:4),PIP(O-36:5)
section12_pixel23_121,0.000140,0.000112,0.000116,0.000125,0.000214,0.000100,0.0001,0.000197,0.000179,0.0001,...,0.0001,0.000100,0.000241,0.000179,0.0001,0.000100,0.0001,0.000261,0.0001,0.000360
section12_pixel23_122,0.000213,0.000112,0.000114,0.000125,0.000204,0.000162,0.0001,0.000100,0.000181,0.0001,...,0.0001,0.000114,0.000395,0.000208,0.0001,0.000316,0.0001,0.000268,0.0001,0.000100
section12_pixel23_123,0.000154,0.000100,0.000117,0.000134,0.000195,0.000151,0.0001,0.000232,0.000179,0.0001,...,0.0001,0.000114,0.000233,0.000203,0.0001,0.000100,0.0001,0.000232,0.0001,0.000100
section12_pixel23_124,0.000147,0.000113,0.000114,0.000136,0.000229,0.000154,0.0001,0.000100,0.000120,0.0001,...,0.0001,0.000114,0.000285,0.000187,0.0001,0.000255,0.0001,0.000100,0.0001,0.000366
section12_pixel23_125,0.000229,0.000112,0.000115,0.000206,0.000100,0.000100,0.0001,0.000100,0.000122,0.0001,...,0.0001,0.000100,0.000247,0.000179,0.0001,0.000323,0.0001,0.000100,0.0001,0.000100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
section12_pixel308_157,0.000139,0.000100,0.000100,0.000100,0.000100,0.000100,0.0001,0.000100,0.000100,0.0001,...,0.0001,0.000100,0.000100,0.000100,0.0001,0.000100,0.0001,0.000100,0.0001,0.000100
section12_pixel308_158,0.000139,0.000100,0.000100,0.000100,0.000100,0.000100,0.0001,0.000100,0.000119,0.0001,...,0.0001,0.000100,0.000100,0.000100,0.0001,0.000100,0.0001,0.000100,0.0001,0.000100
section12_pixel308_159,0.000155,0.000112,0.000117,0.000100,0.000100,0.000100,0.0001,0.000100,0.000100,0.0001,...,0.0001,0.000100,0.000100,0.000100,0.0001,0.000100,0.0001,0.000100,0.0001,0.000100
section12_pixel308_160,0.000141,0.000113,0.000100,0.000100,0.000100,0.000100,0.0001,0.000237,0.000119,0.0001,...,0.0001,0.000100,0.000100,0.000100,0.0001,0.000100,0.0001,0.000100,0.0001,0.000100


In [23]:
aggregated_gene_data_df = aggregated_gene_data_df.reset_index(drop=True)
section_12_lipids_only = section_12_lipids_only.reset_index(drop=True)

# Simplify lipid names

In [24]:
def rename_lipid(lipid_name):
    # Example renaming scheme - customize as needed
    new_name = lipid_name.replace(':', ';').replace('\xa0', ' ')
    return new_name

section_12_lipids_only.columns = [rename_lipid(lipid) for lipid in section_12_lipids_only.columns] 

# Selecting best model and training it and see the results

In [27]:
# Loop through each lipid type based on the lipid names in the column headers.
for i, lipid_name in enumerate(section_12_lipids_only.columns):
    
    # Create a DataFrame to store genes and the corresponding lipid values
    # Use the aggregated gene data (aggregated_gene_data_df) and the lipid data
    df = aggregated_gene_data_df.copy()
    
    # Add a column for the lipid values
    # The index should match the index of the aggregated gene data
    df[lipid_name] = section_12_lipids_only.iloc[:, i]
    
    # Step 1: Setup the environment in PyCaret
    setup(data=df, fold=5, session_id=42, use_gpu=True, verbose=False, preprocess=False)
    
    # Step 2: Select best model
    best_model = compare_models(fold=5)

    # Step 2: Create the model
    model = create_model('catboost')

    # Step 3: Finalize the model
    #final_model = finalize_model(model)
    
    # Step 4: Save the model
    #save_model(model, f'models/{lipid_name}_model_{i}')

[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
lr,Linear Regression,0.0003,0.0,0.0004,0.4759,0.0004,0.3643,0.48
ridge,Ridge Regression,0.0003,0.0,0.0004,0.4761,0.0004,0.3649,0.168
omp,Orthogonal Matching Pursuit,0.0003,0.0,0.0004,0.387,0.0004,0.4289,0.14
lasso,Lasso Regression,0.0004,0.0,0.0005,-0.0,0.0005,0.5921,0.162
en,Elastic Net,0.0004,0.0,0.0005,-0.0,0.0005,0.5921,0.142
llar,Lasso Least Angle Regression,0.0004,0.0,0.0005,-0.0,0.0005,0.5921,0.146
lar,Least Angle Regression,0.0119,0.0006,0.0152,-2186.7267,0.0141,14.1471,0.212


Processing:   0%|          | 0/85 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Evaluating One Model using K-Cross Validation

In [28]:
results_df = pd.DataFrame(columns=['Lipid', 'R2_mean', 'MAPE_mean'])

for i, lipid_name in enumerate(section_12_lipids_only.columns):
    df = aggregated_gene_data_df.copy()
    df[lipid_name] = section_12_lipids_only.iloc[:, i]
    
    s = setup(data=df, target=lipid_name, fold=5, session_id=42, verbose=False, use_gpu=True, preprocess=False)
    
    model = create_model('catboost', verbose=False)
    
    #fine_tuned = tune_model(model, search_library='optuna',optimize="MAPE", n_iter=100, early_stopping=True)
    
    # Retrieving cross-validation results
    metrics = pull()
    
    r2_mean = metrics.loc['Mean','R2']
    mape_mean = metrics.loc['Mean', 'MAPE']
    
    print("R2 mean: ", r2_mean)
    print("MAPE mean: ", mape_mean)
    
    # Append the results to the DataFrame
    results_df = results_df.append({'Lipid': lipid_name, 'R2_mean': r2_mean, 'MAPE_mean': mape_mean}, ignore_index=True)

results_df


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.0002,0.0,0.0003,0.6247,0.0003,0.2721
1,0.0002,0.0,0.0003,0.6181,0.0003,0.2677
2,0.0002,0.0,0.0003,0.6199,0.0003,0.2647
3,0.0002,0.0,0.0003,0.6199,0.0003,0.2646
4,0.0002,0.0,0.0003,0.6197,0.0003,0.2675
Mean,0.0002,0.0,0.0003,0.6204,0.0003,0.2673
Std,0.0,0.0,0.0,0.0022,0.0,0.0027


[I 2023-11-29 18:46:29,289] Searching the best hyperparameters using 66322 samples...
[I 2023-11-29 18:57:48,643] Finished hyperparameter search!


R2 mean:  0.6204
MAPE mean:  0.2673
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 2, number of used features: 0


[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1
[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.
Please recompile with CMake option -DUSE_CUDAP=1


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

[I 2023-11-29 18:58:46,403] Searching the best hyperparameters using 66322 samples...
[W 2023-11-29 19:00:43,730] Trial 21 failed with parameters: {'actual_estimator__eta': 0.3472233733243945, 'actual_estimator__depth': 8, 'actual_estimator__n_estimators': 270, 'actual_estimator__random_strength': 0.5997890431883712, 'actual_estimator__l2_leaf_reg': 1} because of the following error: KeyboardInterrupt('').
Traceback (most recent call last):
  File "/home/jules/miniconda3/lib/python3.11/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/home/jules/miniconda3/lib/python3.11/site-packages/optuna/integration/sklearn.py", line 219, in __call__
    scores = cross_validate(
             ^^^^^^^^^^^^^^^
  File "/home/jules/miniconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 266, in cross_validate
    results = parallel(
              ^^^^^^^^^
  File "/home/jules/minico

KeyboardInterrupt: 

In [26]:
results_df.to_csv('results.csv')