### Import of librairies

In [27]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tabpfn import TabPFNClassifier
from catboost import CatBoostClassifier

### Import of datasets

In [2]:
df = pd.read_csv("full_submission.csv") 
df_test = pd.read_csv("test-full.csv")
df_train = pd.read_csv("train.csv")

### Baseline: XGBoost with no feature eng

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df_train.drop(columns=['Cover_Type']), df_train.Cover_Type, test_size=0.2)

In [6]:
le = LabelEncoder()
y_train_encode = le.fit_transform(y_train)
y_test_encode = le.transform(y_test)

In [7]:
model = xgb.XGBClassifier()
model.fit(X_train, y_train_encode)

In [8]:
output_train = model.predict(X_train)
output_test = model.predict(X_test)

#output_train = le.inverse_transform(model.predict(X_train))
#output_test = le.inverse_transform(model.predict(X_test))

In [10]:
acc_train = accuracy_score(y_train_encode, output_train)
acc_test = accuracy_score(y_test_encode, output_test)
print(f'The accuracy on the train set is equal to {round(acc_train*100,1)}%')
print(f'The accuracy on the test set is equal to {round(acc_test*100,1)}%')

The accuracy on the train set is equal to 99.6%
The accuracy on the test set is equal to 88.6%


In [25]:
model.get_params()

{'objective': 'multi:softprob',
 'use_label_encoder': None,
 'base_score': None,
 'booster': None,
 'callbacks': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': None,
 'feature_types': None,
 'gamma': None,
 'gpu_id': None,
 'grow_policy': None,
 'importance_type': None,
 'interaction_constraints': None,
 'learning_rate': None,
 'max_bin': None,
 'max_cat_threshold': None,
 'max_cat_to_onehot': None,
 'max_delta_step': None,
 'max_depth': None,
 'max_leaves': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'n_estimators': 100,
 'n_jobs': None,
 'num_parallel_tree': None,
 'predictor': None,
 'random_state': None,
 'reg_alpha': None,
 'reg_lambda': None,
 'sampling_method': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': None,
 'validate_parameters': None,
 'verbosity': None}

### Baseline: CatBoost with no feature eng

In [28]:
le = LabelEncoder()
y_train_encode = le.fit_transform(y_train)
y_test_encode = le.transform(y_test)

In [29]:
model = CatBoostClassifier()
model.fit(X_train, y_train_encode)

Learning rate set to 0.089854
0:	learn: 1.7509169	total: 243ms	remaining: 4m 2s
1:	learn: 1.6081058	total: 271ms	remaining: 2m 15s
2:	learn: 1.4891176	total: 310ms	remaining: 1m 42s
3:	learn: 1.4072993	total: 338ms	remaining: 1m 24s
4:	learn: 1.3314998	total: 365ms	remaining: 1m 12s
5:	learn: 1.2724267	total: 391ms	remaining: 1m 4s
6:	learn: 1.2112812	total: 415ms	remaining: 58.9s
7:	learn: 1.1626876	total: 441ms	remaining: 54.7s
8:	learn: 1.1209750	total: 470ms	remaining: 51.7s
9:	learn: 1.0794485	total: 500ms	remaining: 49.5s
10:	learn: 1.0429190	total: 527ms	remaining: 47.4s
11:	learn: 1.0119657	total: 556ms	remaining: 45.7s
12:	learn: 0.9828737	total: 581ms	remaining: 44.1s
13:	learn: 0.9595753	total: 606ms	remaining: 42.7s
14:	learn: 0.9346088	total: 633ms	remaining: 41.6s
15:	learn: 0.9141315	total: 698ms	remaining: 43s
16:	learn: 0.8940719	total: 742ms	remaining: 42.9s
17:	learn: 0.8794662	total: 773ms	remaining: 42.2s
18:	learn: 0.8631452	total: 800ms	remaining: 41.3s
19:	learn

160:	learn: 0.4423529	total: 5.36s	remaining: 27.9s
161:	learn: 0.4414343	total: 5.38s	remaining: 27.8s
162:	learn: 0.4401084	total: 5.41s	remaining: 27.8s
163:	learn: 0.4386404	total: 5.43s	remaining: 27.7s
164:	learn: 0.4379275	total: 5.46s	remaining: 27.6s
165:	learn: 0.4371920	total: 5.49s	remaining: 27.6s
166:	learn: 0.4364101	total: 5.52s	remaining: 27.5s
167:	learn: 0.4356650	total: 5.55s	remaining: 27.5s
168:	learn: 0.4344778	total: 5.59s	remaining: 27.5s
169:	learn: 0.4340618	total: 5.63s	remaining: 27.5s
170:	learn: 0.4336555	total: 5.67s	remaining: 27.5s
171:	learn: 0.4325175	total: 5.7s	remaining: 27.5s
172:	learn: 0.4311973	total: 5.74s	remaining: 27.4s
173:	learn: 0.4304162	total: 5.78s	remaining: 27.4s
174:	learn: 0.4296184	total: 5.82s	remaining: 27.4s
175:	learn: 0.4290642	total: 5.85s	remaining: 27.4s
176:	learn: 0.4279613	total: 5.89s	remaining: 27.4s
177:	learn: 0.4273646	total: 5.92s	remaining: 27.4s
178:	learn: 0.4267873	total: 5.95s	remaining: 27.3s
179:	learn: 0

319:	learn: 0.3423810	total: 10.8s	remaining: 22.9s
320:	learn: 0.3416046	total: 10.8s	remaining: 22.9s
321:	learn: 0.3412898	total: 10.8s	remaining: 22.8s
322:	learn: 0.3410798	total: 10.9s	remaining: 22.8s
323:	learn: 0.3406026	total: 10.9s	remaining: 22.8s
324:	learn: 0.3401567	total: 11s	remaining: 22.7s
325:	learn: 0.3397673	total: 11s	remaining: 22.7s
326:	learn: 0.3388908	total: 11s	remaining: 22.7s
327:	learn: 0.3386209	total: 11.1s	remaining: 22.7s
328:	learn: 0.3381180	total: 11.1s	remaining: 22.7s
329:	learn: 0.3377405	total: 11.2s	remaining: 22.7s
330:	learn: 0.3370221	total: 11.2s	remaining: 22.6s
331:	learn: 0.3367966	total: 11.2s	remaining: 22.6s
332:	learn: 0.3364266	total: 11.3s	remaining: 22.6s
333:	learn: 0.3361038	total: 11.3s	remaining: 22.6s
334:	learn: 0.3357790	total: 11.4s	remaining: 22.6s
335:	learn: 0.3348076	total: 11.4s	remaining: 22.5s
336:	learn: 0.3341906	total: 11.5s	remaining: 22.5s
337:	learn: 0.3337832	total: 11.5s	remaining: 22.6s
338:	learn: 0.3333

484:	learn: 0.2853256	total: 15.8s	remaining: 16.8s
485:	learn: 0.2850336	total: 15.8s	remaining: 16.8s
486:	learn: 0.2849489	total: 15.9s	remaining: 16.7s
487:	learn: 0.2844641	total: 15.9s	remaining: 16.7s
488:	learn: 0.2841002	total: 16s	remaining: 16.7s
489:	learn: 0.2838304	total: 16s	remaining: 16.7s
490:	learn: 0.2836297	total: 16s	remaining: 16.6s
491:	learn: 0.2834187	total: 16.1s	remaining: 16.6s
492:	learn: 0.2831158	total: 16.1s	remaining: 16.6s
493:	learn: 0.2828809	total: 16.1s	remaining: 16.5s
494:	learn: 0.2823233	total: 16.2s	remaining: 16.5s
495:	learn: 0.2822254	total: 16.2s	remaining: 16.5s
496:	learn: 0.2818089	total: 16.2s	remaining: 16.4s
497:	learn: 0.2814296	total: 16.3s	remaining: 16.4s
498:	learn: 0.2813519	total: 16.3s	remaining: 16.4s
499:	learn: 0.2810888	total: 16.3s	remaining: 16.3s
500:	learn: 0.2805320	total: 16.4s	remaining: 16.3s
501:	learn: 0.2802839	total: 16.4s	remaining: 16.3s
502:	learn: 0.2800371	total: 16.4s	remaining: 16.2s
503:	learn: 0.2796

650:	learn: 0.2426723	total: 21.6s	remaining: 11.6s
651:	learn: 0.2425170	total: 21.6s	remaining: 11.5s
652:	learn: 0.2424074	total: 21.6s	remaining: 11.5s
653:	learn: 0.2420711	total: 21.6s	remaining: 11.4s
654:	learn: 0.2419531	total: 21.7s	remaining: 11.4s
655:	learn: 0.2416903	total: 21.7s	remaining: 11.4s
656:	learn: 0.2415614	total: 21.7s	remaining: 11.3s
657:	learn: 0.2413839	total: 21.7s	remaining: 11.3s
658:	learn: 0.2411470	total: 21.8s	remaining: 11.3s
659:	learn: 0.2408777	total: 21.8s	remaining: 11.2s
660:	learn: 0.2406264	total: 21.8s	remaining: 11.2s
661:	learn: 0.2404272	total: 21.8s	remaining: 11.1s
662:	learn: 0.2402107	total: 21.9s	remaining: 11.1s
663:	learn: 0.2400963	total: 21.9s	remaining: 11.1s
664:	learn: 0.2397500	total: 21.9s	remaining: 11s
665:	learn: 0.2396366	total: 21.9s	remaining: 11s
666:	learn: 0.2394285	total: 22s	remaining: 11s
667:	learn: 0.2391500	total: 22s	remaining: 10.9s
668:	learn: 0.2390471	total: 22s	remaining: 10.9s
669:	learn: 0.2388030	to

811:	learn: 0.2134244	total: 25.8s	remaining: 5.96s
812:	learn: 0.2131843	total: 25.8s	remaining: 5.93s
813:	learn: 0.2130202	total: 25.8s	remaining: 5.9s
814:	learn: 0.2128140	total: 25.8s	remaining: 5.86s
815:	learn: 0.2126367	total: 25.9s	remaining: 5.83s
816:	learn: 0.2125234	total: 25.9s	remaining: 5.8s
817:	learn: 0.2124450	total: 25.9s	remaining: 5.76s
818:	learn: 0.2123203	total: 25.9s	remaining: 5.73s
819:	learn: 0.2121672	total: 26s	remaining: 5.7s
820:	learn: 0.2120237	total: 26s	remaining: 5.66s
821:	learn: 0.2118946	total: 26s	remaining: 5.63s
822:	learn: 0.2117568	total: 26s	remaining: 5.6s
823:	learn: 0.2116307	total: 26.1s	remaining: 5.57s
824:	learn: 0.2114252	total: 26.1s	remaining: 5.53s
825:	learn: 0.2111760	total: 26.1s	remaining: 5.5s
826:	learn: 0.2110895	total: 26.1s	remaining: 5.47s
827:	learn: 0.2106594	total: 26.2s	remaining: 5.43s
828:	learn: 0.2103933	total: 26.2s	remaining: 5.4s
829:	learn: 0.2102090	total: 26.2s	remaining: 5.37s
830:	learn: 0.2101647	tota

974:	learn: 0.1891653	total: 30.1s	remaining: 772ms
975:	learn: 0.1890127	total: 30.1s	remaining: 741ms
976:	learn: 0.1889350	total: 30.1s	remaining: 710ms
977:	learn: 0.1887723	total: 30.2s	remaining: 679ms
978:	learn: 0.1886709	total: 30.2s	remaining: 648ms
979:	learn: 0.1885272	total: 30.2s	remaining: 617ms
980:	learn: 0.1883162	total: 30.2s	remaining: 586ms
981:	learn: 0.1882270	total: 30.3s	remaining: 555ms
982:	learn: 0.1879740	total: 30.3s	remaining: 524ms
983:	learn: 0.1877759	total: 30.3s	remaining: 493ms
984:	learn: 0.1875847	total: 30.3s	remaining: 462ms
985:	learn: 0.1872972	total: 30.4s	remaining: 431ms
986:	learn: 0.1872222	total: 30.4s	remaining: 400ms
987:	learn: 0.1871693	total: 30.4s	remaining: 370ms
988:	learn: 0.1870054	total: 30.5s	remaining: 339ms
989:	learn: 0.1868779	total: 30.5s	remaining: 308ms
990:	learn: 0.1867728	total: 30.5s	remaining: 277ms
991:	learn: 0.1866167	total: 30.5s	remaining: 246ms
992:	learn: 0.1865382	total: 30.6s	remaining: 215ms
993:	learn: 

<catboost.core.CatBoostClassifier at 0x1f485a75a00>

In [30]:
output_train = model.predict(X_train)
output_test = model.predict(X_test)

#output_train = le.inverse_transform(model.predict(X_train))
#output_test = le.inverse_transform(model.predict(X_test))

In [31]:
acc_train = accuracy_score(y_train_encode, output_train)
acc_test = accuracy_score(y_test_encode, output_test)
print(f'The accuracy on the train set is equal to {round(acc_train*100,1)}%')
print(f'The accuracy on the test set is equal to {round(acc_test*100,1)}%')

The accuracy on the train set is equal to 95.7%
The accuracy on the test set is equal to 88.1%


## Test avec TabPFNClassifier

In [12]:



# N_ensemble_configurations controls the number of model predictions that are ensembled with feature and class rotations (See our work for details).
# When N_ensemble_configurations > #features * #classes, no further averaging is applied.

classifier = TabPFNClassifier(device='cpu', N_ensemble_configurations=32)

classifier.fit(X_train, y_train_encode)
y_eval, p_eval = classifier.predict(X_test, return_winning_probability=True)

print('Accuracy', accuracy_score(y_test_encode, y_eval))

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters


ValueError: ⚠️ WARNING: TabPFN is not made for datasets with a trainingsize > 1024. Prediction might take a while, be less reliable. We advise not to run datasets > 10k samples, which might lead to your machine crashing (due to quadratic memory scaling of TabPFN). Please confirm you want to run by passing overwrite_warning=True to the fit function.

In [None]:
from autogluon.tabular import TabularPredictor
train_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv')
test_data = TabularDataset('https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv')
predictor = TabularPredictor(label='class').fit(train_data=train_data)
predictions = predictor.predict(test_data)

# SUBMIT

In [None]:
le = LabelEncoder()
y_train_encode = le.fit_transform(df_train.Cover_Type)

In [None]:
model = xgb.XGBClassifier()
model.fit(df_train.drop(columns=['Cover_Type']), y_train_encode)

In [None]:
output = model.predict(df_test)

In [None]:
final_output = le.inverse_transform(output)

In [None]:
df.Cover_Type = final_output

In [None]:
df.to_csv('submission_ML2.csv', index=False)