**In order to get the fastest predictions you need to enable GPUs for the notebook:**
* Navigate to Edit→Notebook Settings
* select GPU from the Hardware Accelerator drop-down
(https://colab.research.google.com/notebooks/gpu.ipynb#scrollTo=oM_8ELnJq_wd)

In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
!pip install caafe



In [20]:
from caafe import CAAFEClassifier # Automated Feature Engineering for tabular datasets
from tabpfn import TabPFNClassifier # Fast Automated Machine Learning method for small tabular datasets
from sklearn.ensemble import RandomForestClassifier

import os
import openai
import torch
from caafe import data
from sklearn.metrics import accuracy_score
from tabpfn.scripts import tabular_metrics
from functools import partial

import pandas as pd

In [21]:
openai.api_key = "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"

In [29]:
metric_used = tabular_metrics.auc_metric
cc_test_datasets_multiclass = data.load_all_data()

Number of datasets: 10
Loading balance-scale 11 ..
Loading breast-w 15 ..
Loading cmc 23 ..
Loading credit-g 31 ..
Loading diabetes 37 ..
Loading tic-tac-toe 50 ..
Loading eucalyptus 188 ..
Loading pc1 1068 ..
Loading airlines 1169 ..
Loading jungle_chess_2pcs_raw_endgame_complete 41027 ..
health-insurance-lead-prediction-raw-data at datasets_kaggle/health-insurance-lead-prediction-raw-data/Health Insurance Lead Prediction Raw Data.csv not found, skipping...
pharyngitis at datasets_kaggle/pharyngitis/pharyngitis.csv not found, skipping...
spaceship-titanic at datasets_kaggle/spaceship-titanic/train.csv not found, skipping...
playground-series-s3e12 at datasets_kaggle/playground-series-s3e12/train.csv not found, skipping...
Downsampling balance-scale to 20.0% of samples
Downsampling breast-w to 10.0% of samples
Downsampling tic-tac-toe to 10.0% of samples


In [30]:
def save_config(dataset_name,target, task_type, data_out_path):
    config_strs = [f"- name: {dataset_name}",
                       "  dataset:",
                       f"    train: \'{{user}}/data/{dataset_name}/{dataset_name}_train.csv\'",
                       f"    test: \'{{user}}/data/{dataset_name}/{dataset_name}_test.csv\'",
                       f"    target: {target}",
                       f"    type: {task_type}",
                       "  folds: 1",
                       "\n"]
    config_str = "\n".join(config_strs)

    yaml_file_local = f'{data_out_path}/{dataset_name}/{dataset_name}.yaml'
    f_local = open(yaml_file_local, 'w')
    f_local.write("--- \n \n")
    f_local.write(config_str)
    f_local.close()

    # yaml_file_benchmark = f'{setting_out_path}/{dataset_name}.yaml'
    # f = open(yaml_file_benchmark, 'w')
    # f.write("--- \n \n")
    # f.write(config_str)
    # f.close() 

In [31]:
data_path = "../data/"

for d in cc_test_datasets_multiclass:   
    ds_name = d[0]
    ds_path = f"{data_path}/{ds_name}"
    os.makedirs(ds_path, exist_ok=True)
    ds, df_train, df_test, _, _ = data.get_data_split(d, seed=0)

    target_col = target_column_name = ds[4][-1]

    n_classes = df_train[target_col].nunique()
    if n_classes == 2:
        task_type = "binary"
    elif n_classes < 300 :
        task_type = "multiclass"
    else:
        task_type = "regression"       

    save_config(dataset_name=ds_name, target=target_col, data_out_path=data_path, task_type=task_type)
    df_train.to_csv(f'{ds_path}/{ds_name}_train.csv', index=False)
    df_test.to_csv(f'{ds_path}/{ds_name}_test.csv', index=False)

    df_all = pd.concat([df_train, df_test])
    df_all.to_csv(f'{ds_path}/{ds_name}.csv', index=False)

    print(f"$CMD {ds_name} {task_type} test")



Using initial description (tried reading data//dataset_descriptions/openml_balance-scale.txt)
$CMD balance-scale multiclass test
Using initial description (tried reading data//dataset_descriptions/openml_breast-w.txt)
$CMD breast-w binary test
Using initial description (tried reading data//dataset_descriptions/openml_cmc.txt)
$CMD cmc multiclass test
Using initial description (tried reading data//dataset_descriptions/openml_credit-g.txt)
$CMD credit-g binary test
Using initial description (tried reading data//dataset_descriptions/openml_diabetes.txt)
$CMD diabetes binary test
Using initial description (tried reading data//dataset_descriptions/openml_tic-tac-toe.txt)
$CMD tic-tac-toe binary test
Using initial description (tried reading data//dataset_descriptions/openml_eucalyptus.txt)
$CMD eucalyptus multiclass test
Using initial description (tried reading data//dataset_descriptions/openml_pc1.txt)
$CMD pc1 binary test
Using initial description (tried reading data//dataset_descriptions/

In [23]:
ds = cc_test_datasets_multiclass[0]
ds, df_train, df_test, _, _ = data.get_data_split(ds, seed=0)
target_column_name = ds[4][-1]
dataset_description = ds[-1]
ds[0]

Using initial description (tried reading data//dataset_descriptions/openml_balance-scale.txt)


'balance-scale'

In [34]:
from caafe.preprocessing import make_datasets_numeric
df_train, df_test = make_datasets_numeric(df_train, df_test, target_column_name)
train_x, train_y = data.get_X_y(df_train, target_column_name)
test_x, test_y = data.get_X_y(df_test, target_column_name)

In [35]:
### Setup Base Classifier

#clf_no_feat_eng = RandomForestClassifier()
clf_no_feat_eng = TabPFNClassifier(device=('cuda' if torch.cuda.is_available() else 'cpu'), N_ensemble_configurations=4)
clf_no_feat_eng.fit = partial(clf_no_feat_eng.fit, overwrite_warning=True)

clf_no_feat_eng.fit(train_x, train_y)
pred = clf_no_feat_eng.predict(test_x)
acc = accuracy_score(pred, test_y)
print(f'Accuracy before CAAFE {acc}')

Accuracy before CAAFE 0.838


In [36]:
### Setup and Run CAAFE - This will be billed to your OpenAI Account!

caafe_clf = CAAFEClassifier(base_classifier=clf_no_feat_eng,
                            llm_model="gpt-4",
                            iterations=10)

caafe_clf.fit_pandas(df_train,
                     target_column_name=target_column_name,
                     dataset_description=dataset_description)

pred = caafe_clf.predict(df_test)
acc = accuracy_score(pred, test_y)
print(f'Accuracy after CAAFE {acc}')

*Dataset description:*
 
**Balance Scale Weight & Distance Database**  
This data set was generated to model psychological experimental results.  Each example is classified as having the balance scale tip to the right, tip to the left, or be balanced. The attributes are the left weight, the left distance, the right weight, and the right distance. The correct way to find the class is the greater of (left-distance * left-weight) and (right-distance * right-weight). If they are equal, it is balanced.



 Attribute description  
The attributes are the left weight, the left distance, the right weight, and the right distance.



 Relevant papers  
Shultz, T., Mareschal, D., & Schmidt, W. (1994). Modeling Cognitive Development on Balance Scale Phenomena. Machine Learning, Vol. 16, pp. 59-88.


*Iteration 1*
```python

# Feature name and description: white_piece_moment
# Usefulness: This feature calculates the moment of the white piece which is a physical quantity that reflects the extent of force that can be applied by the piece to rotate the balance scale. This can be a useful feature to predict the class as it directly relates to the balance scale phenomena.
# Input samples: 'white_piece0_strength': [4.0, 7.0, 6.0], 'white_piece0_file': [6.0, 6.0, 1.0]
df['white_piece_moment'] = df['white_piece0_strength'] * df['white_piece0_file']

```
Performance before adding features ROC 0.801, ACC 0.920.
Performance after adding features ROC 0.794, ACC 0.919.
Improvement ROC -0.007, ACC -0.001.
The last code changes to ´df´ were discarded. (Improvement: -0.00772681085682414)




*Iteration 2*
```python

# Feature name and description: black_piece_moment
# Usefulness: This feature calculates the moment of the black piece which is a physical quantity that reflects the extent of force that can be applied by the piece to rotate the balance scale. This can be a useful feature to predict the class as it directly relates to the balance scale phenomena.
# Input samples: 'black_piece0_strength': [0.0, 0.0, 7.0], 'black_piece0_file': [1.0, 2.0, 4.0]
df['black_piece_moment'] = df['black_piece0_strength'] * df['black_piece0_file']

```
Performance before adding features ROC 0.801, ACC 0.920.
Performance after adding features ROC 0.799, ACC 0.922.
Improvement ROC -0.002, ACC 0.001.
The last code changes to ´df´ were discarded. (Improvement: -0.0005653970779462281)




*Iteration 3*
```python

# Feature name and description: total_moment_difference
# Usefulness: This feature calculates the difference between the moments of the white piece and the black piece. This can be a useful feature to predict the class as the balance scale will tip towards the side with the greater moment.
# Input samples: 'white_piece0_strength': [4.0, 7.0, 6.0], 'white_piece0_file': [6.0, 6.0, 1.0], 'black_piece0_strength': [0.0, 0.0, 7.0], 'black_piece0_file': [1.0, 2.0, 4.0]
df['total_moment_difference'] = (df['white_piece0_strength'] * df['white_piece0_file']) - (df['black_piece0_strength'] * df['black_piece0_file'])

```
Performance before adding features ROC 0.801, ACC 0.920.
Performance after adding features ROC 0.796, ACC 0.920.
Improvement ROC -0.005, ACC 0.000.
The last code changes to ´df´ were discarded. (Improvement: -0.004973780724582211)




*Iteration 4*
```python

# Feature name and description: total_strength
# Usefulness: This feature calculates the total strength of the white and black pieces. This can be a useful feature to predict the class as the balance scale will tip towards the side with the greater strength.
# Input samples: 'white_piece0_strength': [4.0, 7.0, 6.0], 'black_piece0_strength': [0.0, 0.0, 7.0]
df['total_strength'] = df['white_piece0_strength'] + df['black_piece0_strength']
```
Performance before adding features ROC 0.801, ACC 0.920.
Performance after adding features ROC 0.808, ACC 0.930.
Improvement ROC 0.007, ACC 0.010.
The code was executed and changes to ´df´ were kept.




*Iteration 5*
```python

# Feature name and description: strength_difference
# Usefulness: This feature calculates the difference in strength between the white and black pieces. This can be a useful feature to predict the class as the balance scale will tip towards the side with the greater strength.
# Input samples: 'white_piece0_strength': [4.0, 7.0, 6.0], 'black_piece0_strength': [0.0, 0.0, 7.0]
df['strength_difference'] = df['white_piece0_strength'] - df['black_piece0_strength']
```
Performance before adding features ROC 0.808, ACC 0.930.
Performance after adding features ROC 0.809, ACC 0.932.
Improvement ROC 0.001, ACC 0.002.
The code was executed and changes to ´df´ were kept.




*Iteration 6*
```python

# Explanation why the column white_piece0_rank is dropped
# The rank of the white piece does not contribute to the tipping of the balance scale as per the physical properties of the balance scale. Therefore, it can be dropped.
df.drop(columns=['white_piece0_rank'], inplace=True)
```
Performance before adding features ROC 0.809, ACC 0.932.
Performance after adding features ROC 0.673, ACC 0.822.
Improvement ROC -0.136, ACC -0.110.
The last code changes to ´df´ were discarded. (Improvement: -0.24641279442273012)




*Iteration 7*
```python

# Explanation why the column black_piece0_rank is dropped
# The rank of the black piece does not contribute to the tipping of the balance scale as per the physical properties of the balance scale. Therefore, it can be dropped.
df.drop(columns=['black_piece0_rank'], inplace=True)
```
Performance before adding features ROC 0.809, ACC 0.932.
Performance after adding features ROC 0.699, ACC 0.827.
Improvement ROC -0.110, ACC -0.105.
The last code changes to ´df´ were discarded. (Improvement: -0.21481308046144443)




*Iteration 8*
```python

# Feature name and description: distance_difference
# Usefulness: This feature calculates the difference in distance between the white and black pieces. This can be a useful feature to predict the class as the balance scale will tip towards the side with the greater distance.
# Input samples: 'white_piece0_file': [6.0, 6.0, 1.0], 'black_piece0_file': [1.0, 2.0, 4.0]
df['distance_difference'] = df['white_piece0_file'] - df['black_piece0_file']
```
Performance before adding features ROC 0.809, ACC 0.932.
Performance after adding features ROC 0.808, ACC 0.930.
Improvement ROC -0.001, ACC -0.002.
The last code changes to ´df´ were discarded. (Improvement: -0.002587636768232704)




*Iteration 9*
```python

# Feature name and description: total_distance
# Usefulness: This feature calculates the total distance of the white and black pieces. This can be a useful feature to predict the class as the balance scale will tip towards the side with the greater total distance.
# Input samples: 'white_piece0_file': [6.0, 6.0, 1.0], 'black_piece0_file': [1.0, 2.0, 4.0]
df['total_distance'] = df['white_piece0_file'] + df['black_piece0_file']
```
Performance before adding features ROC 0.809, ACC 0.932.
Performance after adding features ROC 0.807, ACC 0.930.
Improvement ROC -0.002, ACC -0.003.
The last code changes to ´df´ were discarded. (Improvement: -0.004199104313860613)




*Iteration 10*
```python

# Feature name and description: total_force
# Usefulness: This feature calculates the total force of the white and black pieces by multiplying their strength with their distance. This can be a useful feature to predict the class as the balance scale will tip towards the side with the greater total force.
# Input samples: 'white_piece0_strength': [4.0, 7.0, 6.0], 'white_piece0_file': [6.0, 6.0, 1.0], 'black_piece0_strength': [0.0, 0.0, 7.0], 'black_piece0_file': [1.0, 2.0, 4.0]
df['total_force'] = (df['white_piece0_strength'] * df['white_piece0_file']) + (df['black_piece0_strength'] * df['black_piece0_file'])
```
Performance before adding features ROC 0.809, ACC 0.932.
Performance after adding features ROC 0.807, ACC 0.928.
Improvement ROC -0.002, ACC -0.004.
The last code changes to ´df´ were discarded. (Improvement: -0.005594553624327703)



Accuracy after CAAFE 0.84


In [32]:
print(caafe_clf.code)


# Feature name: left_moment
# Usefulness: The moment of a force is a measure of its tendency to cause a body to rotate about a specific point or axis. This is as important as the force magnitude itself. In the context of balance scale, the moment can be calculated by multiplying the weight by its distance from the balance point. Therefore, this feature can help to classify whether the scale is tipped to the left, right or balanced.
# Input samples: 'left-weight': [4.0, 5.0, 1.0], 'left-distance': [2.0, 4.0, 4.0]
df['left_moment'] = df['left-weight'] * df['left-distance']


# Feature name: right_moment
# Usefulness: Similar to the left_moment, the right moment is also a measure of its tendency to cause a body to rotate about a specific point or axis. This feature can help to classify whether the scale is tipped to the left, right or balanced.
# Input samples: 'right-weight': [4.0, 4.0, 5.0], 'right-distance': [2.0, 4.0, 5.0]
df['right_moment'] = df['right-weight'] * df['right-distance'

### Optional download Kaggle data

In [16]:
#!ls ~/.kaggle/kaggle.json

# !mkdir ~/.kaggle
# !touch ~/.kaggle/kaggle.json

kaggle_api_token = {"username":"","key":""}

import json
with open('kaggle.json', 'w') as file:
    json.dump(kaggle_api_token, file)

    print("========================")

# !chmod 600 ~/.kaggle/kaggle.json
# !mkdir datasets_kaggle/

from caafe import data

for (name, _, _, user) in data.kaggle_dataset_ids:
    !kaggle datasets download -d {user}/{name}
    !mkdir datasets_kaggle/{name}
    !unzip {name}.zip -d datasets_kaggle/{name}

# Accept rules at https://www.kaggle.com/c/spaceship-titanic/rules
for name in data.kaggle_competition_ids:
    print(name)
    !kaggle competitions download -c {name}
    !mkdir datasets_kaggle/{name}
    !unzip {name}.zip -d datasets_kaggle/{name}

Traceback (most recent call last):
  File "/home/saeed/Documents/Github/CatDB/envCatDB/bin/kaggle", line 33, in <module>
    sys.exit(load_entry_point('kaggle==1.6.6', 'console_scripts', 'kaggle')())
  File "/home/saeed/Documents/Github/CatDB/envCatDB/bin/kaggle", line 25, in importlib_load_entry_point
    return next(matches).load()
  File "/usr/lib/python3.10/importlib/metadata/__init__.py", line 171, in load
    module = import_module(match.group('module'))
  File "/usr/lib/python3.10/importlib/__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 992, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstra