In [None]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier

In [None]:
df_train=pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
train_target=df_train['target']
train_drop=['target','lesion_id','iddx_full','iddx_1',
            'iddx_2','iddx_3','iddx_4','iddx_5','mel_mitotic_index',
            'mel_thick_mm','tbp_lv_dnn_lesion_confidence','isic_id','patient_id']
df_train.drop(train_drop,axis=1,inplace=True)

In [None]:
df_train['anatom_site_general']=df_train['anatom_site_general'].astype('str')
df_train['tbp_lv_location']=df_train['tbp_lv_location'].astype('str')

df_train['lesion_size_ratio']              = df_train['tbp_lv_minorAxisMM'] / df_train['clin_size_long_diam_mm']
df_train['lesion_shape_index']             = df_train['tbp_lv_areaMM2'] / (df_train['tbp_lv_perimeterMM'] ** 2)
df_train['hue_contrast']                   = (df_train['tbp_lv_H'] - df_train['tbp_lv_Hext']).abs()
df_train['luminance_contrast']             = (df_train['tbp_lv_L'] - df_train['tbp_lv_Lext']).abs()
df_train['lesion_color_difference']        = (df_train['tbp_lv_deltaA'] ** 2 + df_train['tbp_lv_deltaB'] ** 2 + df_train['tbp_lv_deltaL'] ** 2)
df_train['border_complexity']              = df_train['tbp_lv_norm_border'] + df_train['tbp_lv_symm_2axis']
df_train['color_uniformity']               = df_train['tbp_lv_color_std_mean'] / (df_train['tbp_lv_radial_color_std_max'])


df_train['position_distance_3d']           = (df_train['tbp_lv_x'] ** 2 + df_train['tbp_lv_y'] ** 2 + df_train['tbp_lv_z'] ** 2)
df_train['perimeter_to_area_ratio']        = df_train['tbp_lv_perimeterMM'] / df_train['tbp_lv_areaMM2']
df_train['area_to_perimeter_ratio']        = df_train['tbp_lv_areaMM2'] / df_train['tbp_lv_perimeterMM']
df_train['lesion_visibility_score']        = df_train['tbp_lv_deltaLBnorm'] + df_train['tbp_lv_norm_color']
df_train['combined_anatomical_site']       = df_train['anatom_site_general'] + '_' + df_train['tbp_lv_location']
df_train['symmetry_border_consistency']    = df_train['tbp_lv_symm_2axis'] * df_train['tbp_lv_norm_border']
df_train['consistency_symmetry_border']    = df_train['tbp_lv_symm_2axis'] * df_train['tbp_lv_norm_border'] / (df_train['tbp_lv_symm_2axis'] + df_train['tbp_lv_norm_border'])


df_train['color_consistency']              = df_train['tbp_lv_stdL'] / df_train['tbp_lv_Lext']
df_train['consistency_color']              = df_train['tbp_lv_stdL'] * df_train['tbp_lv_Lext'] / (df_train['tbp_lv_stdL'] + df_train['tbp_lv_Lext'])
df_train['size_age_interaction']           = df_train['clin_size_long_diam_mm'] * df_train['age_approx']
df_train['hue_color_std_interaction']      = df_train['tbp_lv_H'] * df_train['tbp_lv_color_std_mean']
df_train['lesion_severity_index']          = (df_train['tbp_lv_norm_border'] + df_train['tbp_lv_norm_color'] + df_train['tbp_lv_eccentricity']) / 3
df_train['shape_complexity_index']         = df_train['border_complexity'] + df_train['lesion_shape_index']
df_train['color_contrast_index']           = df_train['tbp_lv_deltaA'] + df_train['tbp_lv_deltaB'] + df_train['tbp_lv_deltaL'] + df_train['tbp_lv_deltaLBnorm']

df_train['log_lesion_area']                = (df_train['tbp_lv_areaMM2'] + 1)
df_train['normalized_lesion_size']         = df_train['clin_size_long_diam_mm'] / df_train['age_approx']
df_train['mean_hue_difference']            = (df_train['tbp_lv_H'] + df_train['tbp_lv_Hext']) / 2
df_train['std_dev_contrast']               = ((df_train['tbp_lv_deltaA'] ** 2 + df_train['tbp_lv_deltaB'] ** 2 + df_train['tbp_lv_deltaL'] ** 2) / 3)
df_train['color_shape_composite_index']    = (df_train['tbp_lv_color_std_mean'] + df_train['tbp_lv_area_perim_ratio'] + df_train['tbp_lv_symm_2axis']) / 3

df_train['overall_color_difference']       = (df_train['tbp_lv_deltaA'] + df_train['tbp_lv_deltaB'] + df_train['tbp_lv_deltaL']) / 3

df_train['symmetry_perimeter_interaction'] = df_train['tbp_lv_symm_2axis'] * df_train['tbp_lv_perimeterMM']
df_train['comprehensive_lesion_index']     = (df_train['tbp_lv_area_perim_ratio'] + df_train['tbp_lv_eccentricity'] + df_train['tbp_lv_norm_color'] + df_train['tbp_lv_symm_2axis']) / 4
df_train['color_variance_ratio']           = df_train['tbp_lv_color_std_mean'] / df_train['tbp_lv_stdLExt']
df_train['border_color_interaction']       = df_train['tbp_lv_norm_border'] * df_train['tbp_lv_norm_color']
df_train['border_color_interaction_2']     = df_train['tbp_lv_norm_border'] * df_train['tbp_lv_norm_color'] / (df_train['tbp_lv_norm_border'] + df_train['tbp_lv_norm_color'])
df_train['size_color_contrast_ratio']      = df_train['clin_size_long_diam_mm'] / df_train['tbp_lv_deltaLBnorm']
df_train['age_normalized_nevi_confidence'] = df_train['tbp_lv_nevi_confidence'] / df_train['age_approx']
df_train['age_normalized_nevi_confidence_2'] = (df_train['clin_size_long_diam_mm']**2 + df_train['age_approx']**2)
df_train['color_asymmetry_index']          = df_train['tbp_lv_radial_color_std_max'] * df_train['tbp_lv_symm_2axis']

df_train['volume_approximation_3d']        = df_train['tbp_lv_areaMM2'] * (df_train['tbp_lv_x']**2 + df_train['tbp_lv_y']**2 + df_train['tbp_lv_z']**2)
df_train['color_range']                    = (df_train['tbp_lv_L'] - df_train['tbp_lv_Lext']).abs() + (df_train['tbp_lv_A'] - df_train['tbp_lv_Aext']).abs() + (df_train['tbp_lv_B'] - df_train['tbp_lv_Bext']).abs()
df_train['shape_color_consistency']        = df_train['tbp_lv_eccentricity'] * df_train['tbp_lv_color_std_mean']
df_train['border_length_ratio']            = df_train['tbp_lv_perimeterMM'] / (2 * np.pi * (df_train['tbp_lv_areaMM2'] / np.pi))
df_train['age_size_symmetry_index']        = df_train['age_approx'] * df_train['clin_size_long_diam_mm'] * df_train['tbp_lv_symm_2axis']
df_train['index_age_size_symmetry']        = df_train['age_approx'] * df_train['tbp_lv_areaMM2'] * df_train['tbp_lv_symm_2axis']

In [None]:
df_train.columns

In [None]:
i=0
cat_features=[]
num_features=[]
for col in df_train.columns:   
    if df_train[col].dtype=='O':
        cat_features.append(i)
        df_train[col]=df_train[col].astype('str')
    else:
        num_features.append(col)        
    i+=1

In [None]:
clf=CatBoostClassifier(iterations=400,
                       learning_rate=0.05,
                       depth=8,
#                        model_shrink_mode=True,
                       loss_function='Logloss',
                       nan_mode='Min',
#                        task_type='GPU',
                       l2_leaf_reg= 5,
                     )
clf.fit(df_train,train_target,cat_features=cat_features,verbose=False)

In [None]:
df_test=pd.read_csv('/kaggle/input/isic-2024-challenge/test-metadata.csv')
df_test['lesion_size_ratio']              = df_test['tbp_lv_minorAxisMM'] / df_test['clin_size_long_diam_mm']
df_test['lesion_shape_index']             = df_test['tbp_lv_areaMM2'] / (df_test['tbp_lv_perimeterMM'] ** 2)
df_test['hue_contrast']                   = (df_test['tbp_lv_H'] - df_test['tbp_lv_Hext']).abs()
df_test['luminance_contrast']             = (df_test['tbp_lv_L'] - df_test['tbp_lv_Lext']).abs()
df_test['lesion_color_difference']        = (df_test['tbp_lv_deltaA'] ** 2 + df_test['tbp_lv_deltaB'] ** 2 + df_test['tbp_lv_deltaL'] ** 2)
df_test['border_complexity']              = df_test['tbp_lv_norm_border'] + df_test['tbp_lv_symm_2axis']
df_test['color_uniformity']               = df_test['tbp_lv_color_std_mean'] / (df_test['tbp_lv_radial_color_std_max'])


df_test['position_distance_3d']           = (df_test['tbp_lv_x'] ** 2 + df_test['tbp_lv_y'] ** 2 + df_test['tbp_lv_z'] ** 2)
df_test['perimeter_to_area_ratio']        = df_test['tbp_lv_perimeterMM'] / df_test['tbp_lv_areaMM2']
df_test['area_to_perimeter_ratio']        = df_test['tbp_lv_areaMM2'] / df_test['tbp_lv_perimeterMM']
df_test['lesion_visibility_score']        = df_test['tbp_lv_deltaLBnorm'] + df_test['tbp_lv_norm_color']
df_test['combined_anatomical_site']       = df_test['anatom_site_general'] + '_' + df_test['tbp_lv_location']
df_test['symmetry_border_consistency']    = df_test['tbp_lv_symm_2axis'] * df_test['tbp_lv_norm_border']
df_test['consistency_symmetry_border']    = df_test['tbp_lv_symm_2axis'] * df_test['tbp_lv_norm_border'] / (df_test['tbp_lv_symm_2axis'] + df_test['tbp_lv_norm_border'])


df_test['color_consistency']              = df_test['tbp_lv_stdL'] / df_test['tbp_lv_Lext']
df_test['consistency_color']              = df_test['tbp_lv_stdL'] * df_test['tbp_lv_Lext'] / (df_test['tbp_lv_stdL'] + df_test['tbp_lv_Lext'])
df_test['size_age_interaction']           = df_test['clin_size_long_diam_mm'] * df_test['age_approx']
df_test['hue_color_std_interaction']      = df_test['tbp_lv_H'] * df_test['tbp_lv_color_std_mean']
df_test['lesion_severity_index']          = (df_test['tbp_lv_norm_border'] + df_test['tbp_lv_norm_color'] + df_test['tbp_lv_eccentricity']) / 3
df_test['shape_complexity_index']         = df_test['border_complexity'] + df_test['lesion_shape_index']
df_test['color_contrast_index']           = df_test['tbp_lv_deltaA'] + df_test['tbp_lv_deltaB'] + df_test['tbp_lv_deltaL'] + df_test['tbp_lv_deltaLBnorm']

df_test['log_lesion_area']                = (df_test['tbp_lv_areaMM2'] + 1)
df_test['normalized_lesion_size']         = df_test['clin_size_long_diam_mm'] / df_test['age_approx']
df_test['mean_hue_difference']            = (df_test['tbp_lv_H'] + df_test['tbp_lv_Hext']) / 2
df_test['std_dev_contrast']               = ((df_test['tbp_lv_deltaA'] ** 2 + df_test['tbp_lv_deltaB'] ** 2 + df_test['tbp_lv_deltaL'] ** 2) / 3)
df_test['color_shape_composite_index']    = (df_test['tbp_lv_color_std_mean'] + df_test['tbp_lv_area_perim_ratio'] + df_test['tbp_lv_symm_2axis']) / 3

df_test['overall_color_difference']       = (df_test['tbp_lv_deltaA'] + df_test['tbp_lv_deltaB'] + df_test['tbp_lv_deltaL']) / 3

df_test['symmetry_perimeter_interaction'] = df_test['tbp_lv_symm_2axis'] * df_test['tbp_lv_perimeterMM']
df_test['comprehensive_lesion_index']     = (df_test['tbp_lv_area_perim_ratio'] + df_test['tbp_lv_eccentricity'] + df_test['tbp_lv_norm_color'] + df_test['tbp_lv_symm_2axis']) / 4
df_test['color_variance_ratio']           = df_test['tbp_lv_color_std_mean'] / df_test['tbp_lv_stdLExt']
df_test['border_color_interaction']       = df_test['tbp_lv_norm_border'] * df_test['tbp_lv_norm_color']
df_test['border_color_interaction_2']     = df_test['tbp_lv_norm_border'] * df_test['tbp_lv_norm_color'] / (df_test['tbp_lv_norm_border'] + df_test['tbp_lv_norm_color'])
df_test['size_color_contrast_ratio']      = df_test['clin_size_long_diam_mm'] / df_test['tbp_lv_deltaLBnorm']
df_test['age_normalized_nevi_confidence'] = df_test['tbp_lv_nevi_confidence'] / df_test['age_approx']
df_test['age_normalized_nevi_confidence_2'] = (df_test['clin_size_long_diam_mm']**2 + df_test['age_approx']**2)
df_test['color_asymmetry_index']          = df_test['tbp_lv_radial_color_std_max'] * df_test['tbp_lv_symm_2axis']

df_test['volume_approximation_3d']        = df_test['tbp_lv_areaMM2'] * (df_test['tbp_lv_x']**2 + df_test['tbp_lv_y']**2 + df_test['tbp_lv_z']**2)
df_test['color_range']                    = (df_test['tbp_lv_L'] - df_test['tbp_lv_Lext']).abs() + (df_test['tbp_lv_A'] - df_test['tbp_lv_Aext']).abs() + (df_test['tbp_lv_B'] - df_test['tbp_lv_Bext']).abs()
df_test['shape_color_consistency']        = df_test['tbp_lv_eccentricity'] * df_test['tbp_lv_color_std_mean']
df_test['border_length_ratio']            = df_test['tbp_lv_perimeterMM'] / (2 * np.pi * (df_test['tbp_lv_areaMM2'] / np.pi))
df_test['age_size_symmetry_index']        = df_test['age_approx'] * df_test['clin_size_long_diam_mm'] * df_test['tbp_lv_symm_2axis']
df_test['index_age_size_symmetry']        = df_test['age_approx'] * df_test['tbp_lv_areaMM2'] * df_test['tbp_lv_symm_2axis']

In [None]:
isic_id=df_test['isic_id']
for col_test in df_test.columns:
    g=1
    for col in df_train.columns:
        if col_test==col:
            g=0
    if g==1:
        df_test.drop(col_test,axis=1,inplace=True)
        
for col_test in df_test.columns:   
    if df_test[col_test].dtype=='O':
        df_test[col_test]=df_test[col_test].astype('str')
        
pred=clf.predict_proba(df_test)
pred=pd.DataFrame(pred[:,1])
df_sub=pd.concat([isic_id,pred],axis=1)
df_sub.to_csv('/kaggle/working/submission.csv',index=False)