# Baseline — Playground Series S5E7
Pierwsze podejście: Baseline - LightGBM

In [1]:
import sys
import pandas as pd
import numpy as np

# Dodaj src do sys.path, by importować własne moduły
sys.path.append('../src')

from experiment_logger import log_experiment

In [2]:
TRAIN_PATH = '../../playground-series-s5e7/train.csv'
TEST_PATH = '../../playground-series-s5e7/test.csv'

train_data = pd.read_csv(TRAIN_PATH)
test_data = pd.read_csv(TEST_PATH)

print('Train shape:', train_data.shape)
print('Test shape:', test_data.shape)
train_data.head()

Train shape: (18524, 9)
Test shape: (6175, 8)


Unnamed: 0,id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,0,0.0,No,6.0,4.0,No,15.0,5.0,Extrovert
1,1,1.0,No,7.0,3.0,No,10.0,8.0,Extrovert
2,2,6.0,Yes,1.0,0.0,,3.0,0.0,Introvert
3,3,3.0,No,7.0,3.0,No,11.0,5.0,Extrovert
4,4,1.0,No,4.0,4.0,No,13.0,,Extrovert


In [3]:
train_data.isna().sum()

id                              0
Time_spent_Alone             1190
Stage_fear                   1893
Social_event_attendance      1180
Going_outside                1466
Drained_after_socializing    1149
Friends_circle_size          1054
Post_frequency               1264
Personality                     0
dtype: int64

In [4]:
test_data.isna().sum()

id                             0
Time_spent_Alone             425
Stage_fear                   598
Social_event_attendance      397
Going_outside                466
Drained_after_socializing    432
Friends_circle_size          350
Post_frequency               408
dtype: int64

In [5]:
train_data.info()
train_data.describe()
train_data['Personality'].value_counts(normalize=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18524 entries, 0 to 18523
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18524 non-null  int64  
 1   Time_spent_Alone           17334 non-null  float64
 2   Stage_fear                 16631 non-null  object 
 3   Social_event_attendance    17344 non-null  float64
 4   Going_outside              17058 non-null  float64
 5   Drained_after_socializing  17375 non-null  object 
 6   Friends_circle_size        17470 non-null  float64
 7   Post_frequency             17260 non-null  float64
 8   Personality                18524 non-null  object 
dtypes: float64(5), int64(1), object(3)
memory usage: 1.3+ MB


Personality
Extrovert    0.739527
Introvert    0.260473
Name: proportion, dtype: float64

In [6]:
train_data.drop(columns=['id'], inplace=True)
test_data.drop(columns=['id'], inplace=True)

In [7]:
# Załóżmy, że usunąłeś już kolumnę 'id' z train i test
from data_utils import split_numerical_categorical
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer

# Zakładam, że X_train to Twój DataFrame z cechami (bez targetu i id)
target = train_data['Personality']
train_data.drop(columns=['Personality'], inplace=True)
numerical_cols, categorical_cols = split_numerical_categorical(train_data)

print("Zmienne numeryczne:", numerical_cols)
print("Zmienne kategoryczne:", categorical_cols)

# Łączymy train i test, by mieć spójny encoding i imputację
full = pd.concat([train_data, test_data], axis=0, ignore_index=True)

# Imputacja numeryczna
imputer = IterativeImputer(random_state=42)
full[numerical_cols] = imputer.fit_transform(full[numerical_cols])

print("Kolumny w full:", full.columns.tolist())
print("Kolumny kategorialne:", categorical_cols)

# One-hot encoding
full_encoded = pd.get_dummies(full, columns=categorical_cols)

# Rozdzielamy z powrotem
X_train = full_encoded.iloc[:len(train_data)]
X_test = full_encoded.iloc[len(train_data):]
y_train = target

Zmienne numeryczne: ['Time_spent_Alone', 'Social_event_attendance', 'Going_outside', 'Friends_circle_size', 'Post_frequency']
Zmienne kategoryczne: ['Stage_fear', 'Drained_after_socializing']
Kolumny w full: ['Time_spent_Alone', 'Stage_fear', 'Social_event_attendance', 'Going_outside', 'Drained_after_socializing', 'Friends_circle_size', 'Post_frequency']
Kolumny kategorialne: ['Stage_fear', 'Drained_after_socializing']


In [8]:
X_train.head()

Unnamed: 0,Time_spent_Alone,Social_event_attendance,Going_outside,Friends_circle_size,Post_frequency,Stage_fear_No,Stage_fear_Yes,Drained_after_socializing_No,Drained_after_socializing_Yes
0,0.0,6.0,4.0,15.0,5.0,True,False,True,False
1,1.0,7.0,3.0,10.0,8.0,True,False,True,False
2,6.0,1.0,0.0,3.0,0.0,False,True,False,False
3,3.0,7.0,3.0,11.0,5.0,True,False,True,False
4,1.0,4.0,4.0,13.0,5.708436,True,False,True,False


In [9]:
print(X_train.shape)
print(X_test.shape)

(18524, 9)
(6175, 9)


In [10]:
print(X_test.dtypes)
print(X_train.dtypes)
print(X_test.head())

Time_spent_Alone                 float64
Social_event_attendance          float64
Going_outside                    float64
Friends_circle_size              float64
Post_frequency                   float64
Stage_fear_No                       bool
Stage_fear_Yes                      bool
Drained_after_socializing_No        bool
Drained_after_socializing_Yes       bool
dtype: object
Time_spent_Alone                 float64
Social_event_attendance          float64
Going_outside                    float64
Friends_circle_size              float64
Post_frequency                   float64
Stage_fear_No                       bool
Stage_fear_Yes                      bool
Drained_after_socializing_No        bool
Drained_after_socializing_Yes       bool
dtype: object
       Time_spent_Alone  Social_event_attendance  Going_outside  \
18524          3.000000                      7.0            4.0   
18525          7.369721                      0.0            0.0   
18526          3.000000          

In [11]:
# Zakoduj target na liczby
y_train = y_train.map({'Extrovert': 0, 'Introvert': 1})

# Sprawdź, czy wszystko jest OK
print("Unikalne wartości y_train:", y_train.unique())
print("Typ y_train:", y_train.dtype)

Unikalne wartości y_train: [0 1]
Typ y_train: int64


In [12]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import xgboost as xgb
import numpy as np
from scipy.stats import mode
from xgboost.callback import EarlyStopping

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scores = []
test_preds = np.zeros((len(X_test), 10))

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
    
    model = xgb.XGBClassifier(
        n_estimators=1000,
        learning_rate=0.05,
        random_state=42,
        n_jobs=-1,
        use_label_encoder=False,
        eval_metric='logloss'
    )

    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
    )
    
    val_pred = model.predict(X_val)
    score = accuracy_score(y_val, val_pred)
    scores.append(score)
    
    # Predykcje na test dla każdego folda (do uśrednienia)
    test_preds[:, fold] = model.predict_proba(X_test)[:, 1]

# Uśrednienie predykcji po foldach
mean_preds = test_preds.mean(axis=1)
final_test_pred = (mean_preds > 0.5).astype(int)
print(f'Fold accuracy: {scores}')
print(f'Mean CV accuracy: {np.mean(scores):.4f}')

[0]	validation_0-logloss:0.53235
[1]	validation_0-logloss:0.49692
[2]	validation_0-logloss:0.46600
[3]	validation_0-logloss:0.43875
[4]	validation_0-logloss:0.41443
[5]	validation_0-logloss:0.39250
[6]	validation_0-logloss:0.37266
[7]	validation_0-logloss:0.35465
[8]	validation_0-logloss:0.33822
[9]	validation_0-logloss:0.32317
[10]	validation_0-logloss:0.30936
[11]	validation_0-logloss:0.29667
[12]	validation_0-logloss:0.28492
[13]	validation_0-logloss:0.27407
[14]	validation_0-logloss:0.26407
[15]	validation_0-logloss:0.25484
[16]	validation_0-logloss:0.24614
[17]	validation_0-logloss:0.23814
[18]	validation_0-logloss:0.23071
[19]	validation_0-logloss:0.22373
[20]	validation_0-logloss:0.21728
[21]	validation_0-logloss:0.21133
[22]	validation_0-logloss:0.20571
[23]	validation_0-logloss:0.20054
[24]	validation_0-logloss:0.19565
[25]	validation_0-logloss:0.19115
[26]	validation_0-logloss:0.18683
[27]	validation_0-logloss:0.18289
[28]	validation_0-logloss:0.17911
[29]	validation_0-loglos

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[51]	validation_0-logloss:0.13825
[52]	validation_0-logloss:0.13765
[53]	validation_0-logloss:0.13699
[54]	validation_0-logloss:0.13636
[55]	validation_0-logloss:0.13580
[56]	validation_0-logloss:0.13534
[57]	validation_0-logloss:0.13485
[58]	validation_0-logloss:0.13440
[59]	validation_0-logloss:0.13393
[60]	validation_0-logloss:0.13352
[61]	validation_0-logloss:0.13323
[62]	validation_0-logloss:0.13291
[63]	validation_0-logloss:0.13262
[64]	validation_0-logloss:0.13231
[65]	validation_0-logloss:0.13203
[66]	validation_0-logloss:0.13175
[67]	validation_0-logloss:0.13151
[68]	validation_0-logloss:0.13138
[69]	validation_0-logloss:0.13127
[70]	validation_0-logloss:0.13111
[71]	validation_0-logloss:0.13104
[72]	validation_0-logloss:0.13097
[73]	validation_0-logloss:0.13091
[74]	validation_0-logloss:0.13089
[75]	validation_0-logloss:0.13086
[76]	validation_0-logloss:0.13076
[77]	validation_0-logloss:0.13068
[78]	validation_0-logloss:0.13058
[79]	validation_0-logloss:0.13050
[80]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[50]	validation_0-logloss:0.13318
[51]	validation_0-logloss:0.13239
[52]	validation_0-logloss:0.13163
[53]	validation_0-logloss:0.13103
[54]	validation_0-logloss:0.13038
[55]	validation_0-logloss:0.12985
[56]	validation_0-logloss:0.12931
[57]	validation_0-logloss:0.12881
[58]	validation_0-logloss:0.12833
[59]	validation_0-logloss:0.12793
[60]	validation_0-logloss:0.12753
[61]	validation_0-logloss:0.12720
[62]	validation_0-logloss:0.12693
[63]	validation_0-logloss:0.12668
[64]	validation_0-logloss:0.12643
[65]	validation_0-logloss:0.12620
[66]	validation_0-logloss:0.12600
[67]	validation_0-logloss:0.12586
[68]	validation_0-logloss:0.12567
[69]	validation_0-logloss:0.12556
[70]	validation_0-logloss:0.12542
[71]	validation_0-logloss:0.12530
[72]	validation_0-logloss:0.12518
[73]	validation_0-logloss:0.12522
[74]	validation_0-logloss:0.12514
[75]	validation_0-logloss:0.12503
[76]	validation_0-logloss:0.12500
[77]	validation_0-logloss:0.12499
[78]	validation_0-logloss:0.12490
[79]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[52]	validation_0-logloss:0.13104
[53]	validation_0-logloss:0.13032
[54]	validation_0-logloss:0.12963
[55]	validation_0-logloss:0.12898
[56]	validation_0-logloss:0.12834
[57]	validation_0-logloss:0.12778
[58]	validation_0-logloss:0.12722
[59]	validation_0-logloss:0.12679
[60]	validation_0-logloss:0.12626
[61]	validation_0-logloss:0.12587
[62]	validation_0-logloss:0.12553
[63]	validation_0-logloss:0.12511
[64]	validation_0-logloss:0.12484
[65]	validation_0-logloss:0.12457
[66]	validation_0-logloss:0.12433
[67]	validation_0-logloss:0.12405
[68]	validation_0-logloss:0.12369
[69]	validation_0-logloss:0.12355
[70]	validation_0-logloss:0.12330
[71]	validation_0-logloss:0.12328
[72]	validation_0-logloss:0.12308
[73]	validation_0-logloss:0.12281
[74]	validation_0-logloss:0.12282
[75]	validation_0-logloss:0.12265
[76]	validation_0-logloss:0.12260
[77]	validation_0-logloss:0.12243
[78]	validation_0-logloss:0.12228
[79]	validation_0-logloss:0.12223
[80]	validation_0-logloss:0.12218
[81]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[42]	validation_0-logloss:0.15973
[43]	validation_0-logloss:0.15852
[44]	validation_0-logloss:0.15752
[45]	validation_0-logloss:0.15655
[46]	validation_0-logloss:0.15573
[47]	validation_0-logloss:0.15486
[48]	validation_0-logloss:0.15407
[49]	validation_0-logloss:0.15338
[50]	validation_0-logloss:0.15269
[51]	validation_0-logloss:0.15216
[52]	validation_0-logloss:0.15157
[53]	validation_0-logloss:0.15098
[54]	validation_0-logloss:0.15050
[55]	validation_0-logloss:0.15008
[56]	validation_0-logloss:0.14967
[57]	validation_0-logloss:0.14934
[58]	validation_0-logloss:0.14901
[59]	validation_0-logloss:0.14861
[60]	validation_0-logloss:0.14839
[61]	validation_0-logloss:0.14826
[62]	validation_0-logloss:0.14809
[63]	validation_0-logloss:0.14786
[64]	validation_0-logloss:0.14765
[65]	validation_0-logloss:0.14750
[66]	validation_0-logloss:0.14740
[67]	validation_0-logloss:0.14723
[68]	validation_0-logloss:0.14707
[69]	validation_0-logloss:0.14698
[70]	validation_0-logloss:0.14684
[71]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[53]	validation_0-logloss:0.14549
[54]	validation_0-logloss:0.14508
[55]	validation_0-logloss:0.14474
[56]	validation_0-logloss:0.14437
[57]	validation_0-logloss:0.14404
[58]	validation_0-logloss:0.14374
[59]	validation_0-logloss:0.14339
[60]	validation_0-logloss:0.14314
[61]	validation_0-logloss:0.14297
[62]	validation_0-logloss:0.14281
[63]	validation_0-logloss:0.14258
[64]	validation_0-logloss:0.14247
[65]	validation_0-logloss:0.14233
[66]	validation_0-logloss:0.14221
[67]	validation_0-logloss:0.14206
[68]	validation_0-logloss:0.14197
[69]	validation_0-logloss:0.14190
[70]	validation_0-logloss:0.14177
[71]	validation_0-logloss:0.14166
[72]	validation_0-logloss:0.14161
[73]	validation_0-logloss:0.14155
[74]	validation_0-logloss:0.14148
[75]	validation_0-logloss:0.14141
[76]	validation_0-logloss:0.14138
[77]	validation_0-logloss:0.14135
[78]	validation_0-logloss:0.14126
[79]	validation_0-logloss:0.14122
[80]	validation_0-logloss:0.14119
[81]	validation_0-logloss:0.14113
[82]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[64]	validation_0-logloss:0.14076
[65]	validation_0-logloss:0.14063
[66]	validation_0-logloss:0.14056
[67]	validation_0-logloss:0.14035
[68]	validation_0-logloss:0.14011
[69]	validation_0-logloss:0.14004
[70]	validation_0-logloss:0.14002
[71]	validation_0-logloss:0.14007
[72]	validation_0-logloss:0.14005
[73]	validation_0-logloss:0.14008
[74]	validation_0-logloss:0.14003
[75]	validation_0-logloss:0.14003
[76]	validation_0-logloss:0.14001
[77]	validation_0-logloss:0.14000
[78]	validation_0-logloss:0.14005
[79]	validation_0-logloss:0.14012
[80]	validation_0-logloss:0.14014
[81]	validation_0-logloss:0.14009
[82]	validation_0-logloss:0.14012
[83]	validation_0-logloss:0.14022
[84]	validation_0-logloss:0.14016
[85]	validation_0-logloss:0.14011
[86]	validation_0-logloss:0.14006
[87]	validation_0-logloss:0.14011
[88]	validation_0-logloss:0.14020
[89]	validation_0-logloss:0.14029
[90]	validation_0-logloss:0.14037
[91]	validation_0-logloss:0.14046
[92]	validation_0-logloss:0.14050
[93]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[55]	validation_0-logloss:0.12894
[56]	validation_0-logloss:0.12841
[57]	validation_0-logloss:0.12799
[58]	validation_0-logloss:0.12761
[59]	validation_0-logloss:0.12720
[60]	validation_0-logloss:0.12679
[61]	validation_0-logloss:0.12643
[62]	validation_0-logloss:0.12606
[63]	validation_0-logloss:0.12581
[64]	validation_0-logloss:0.12566
[65]	validation_0-logloss:0.12543
[66]	validation_0-logloss:0.12517
[67]	validation_0-logloss:0.12494
[68]	validation_0-logloss:0.12482
[69]	validation_0-logloss:0.12471
[70]	validation_0-logloss:0.12453
[71]	validation_0-logloss:0.12437
[72]	validation_0-logloss:0.12421
[73]	validation_0-logloss:0.12410
[74]	validation_0-logloss:0.12406
[75]	validation_0-logloss:0.12397
[76]	validation_0-logloss:0.12387
[77]	validation_0-logloss:0.12369
[78]	validation_0-logloss:0.12360
[79]	validation_0-logloss:0.12361
[80]	validation_0-logloss:0.12356
[81]	validation_0-logloss:0.12349
[82]	validation_0-logloss:0.12339
[83]	validation_0-logloss:0.12332
[84]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[61]	validation_0-logloss:0.13768
[62]	validation_0-logloss:0.13746
[63]	validation_0-logloss:0.13718
[64]	validation_0-logloss:0.13691
[65]	validation_0-logloss:0.13673
[66]	validation_0-logloss:0.13636
[67]	validation_0-logloss:0.13627
[68]	validation_0-logloss:0.13596
[69]	validation_0-logloss:0.13586
[70]	validation_0-logloss:0.13576
[71]	validation_0-logloss:0.13557
[72]	validation_0-logloss:0.13530
[73]	validation_0-logloss:0.13509
[74]	validation_0-logloss:0.13513
[75]	validation_0-logloss:0.13509
[76]	validation_0-logloss:0.13500
[77]	validation_0-logloss:0.13498
[78]	validation_0-logloss:0.13492
[79]	validation_0-logloss:0.13488
[80]	validation_0-logloss:0.13480
[81]	validation_0-logloss:0.13482
[82]	validation_0-logloss:0.13479
[83]	validation_0-logloss:0.13482
[84]	validation_0-logloss:0.13481
[85]	validation_0-logloss:0.13478
[86]	validation_0-logloss:0.13475
[87]	validation_0-logloss:0.13473
[88]	validation_0-logloss:0.13474
[89]	validation_0-logloss:0.13477
[90]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[70]	validation_0-logloss:0.12260
[71]	validation_0-logloss:0.12248
[72]	validation_0-logloss:0.12237
[73]	validation_0-logloss:0.12223
[74]	validation_0-logloss:0.12212
[75]	validation_0-logloss:0.12200
[76]	validation_0-logloss:0.12189
[77]	validation_0-logloss:0.12180
[78]	validation_0-logloss:0.12173
[79]	validation_0-logloss:0.12167
[80]	validation_0-logloss:0.12161
[81]	validation_0-logloss:0.12151
[82]	validation_0-logloss:0.12155
[83]	validation_0-logloss:0.12155
[84]	validation_0-logloss:0.12161
[85]	validation_0-logloss:0.12157
[86]	validation_0-logloss:0.12157
[87]	validation_0-logloss:0.12154
[88]	validation_0-logloss:0.12153
[89]	validation_0-logloss:0.12142
[90]	validation_0-logloss:0.12145
[91]	validation_0-logloss:0.12145
[92]	validation_0-logloss:0.12144
[93]	validation_0-logloss:0.12142
[94]	validation_0-logloss:0.12152
[95]	validation_0-logloss:0.12146
[96]	validation_0-logloss:0.12167
[97]	validation_0-logloss:0.12174
[98]	validation_0-logloss:0.12182
[99]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[54]	validation_0-logloss:0.12817
[55]	validation_0-logloss:0.12754
[56]	validation_0-logloss:0.12698
[57]	validation_0-logloss:0.12652
[58]	validation_0-logloss:0.12605
[59]	validation_0-logloss:0.12566
[60]	validation_0-logloss:0.12524
[61]	validation_0-logloss:0.12497
[62]	validation_0-logloss:0.12461
[63]	validation_0-logloss:0.12425
[64]	validation_0-logloss:0.12401
[65]	validation_0-logloss:0.12373
[66]	validation_0-logloss:0.12346
[67]	validation_0-logloss:0.12320
[68]	validation_0-logloss:0.12301
[69]	validation_0-logloss:0.12290
[70]	validation_0-logloss:0.12268
[71]	validation_0-logloss:0.12248
[72]	validation_0-logloss:0.12231
[73]	validation_0-logloss:0.12214
[74]	validation_0-logloss:0.12201
[75]	validation_0-logloss:0.12189
[76]	validation_0-logloss:0.12178
[77]	validation_0-logloss:0.12173
[78]	validation_0-logloss:0.12161
[79]	validation_0-logloss:0.12157
[80]	validation_0-logloss:0.12151
[81]	validation_0-logloss:0.12145
[82]	validation_0-logloss:0.12141
[83]	validati

In [13]:
import sys
sys.path.append('../src')

from experiment_logger import log_experiment

params = {
    'model': 'XGBClassifier',
    'encoding': 'one-hot',
    'n_splits': 10,
    'learning_rate': 0.05,
    'n_estimators': 1000,
    'random_state': 42
}

log_experiment(
    experiment_name='xgb_ohe_10fold',
    model_name='XGBClassifier',
    params=params,
    cv_score=np.mean(scores),
    comment='XGBoost, one-hot encoding, 10-fold CV - mean probability (uśredniona predykcja po foldach), Personality zakodowany na liczby, numeryczna imputacja (IterativeImputer) przed one-hot encodingiem'
)
print('Eksperyment został zalogowany!')

Eksperyment został zalogowany!


In [14]:
import os
# Wczytaj sample_submission, aby pobrać wymagane kolumny i kolejność
sample_submission = pd.read_csv('../../playground-series-s5e7/sample_submission.csv')

# Zakładam, że predykcje są w zmiennej final_test_pred (np. jako liczby lub kategorie)
if set(np.unique(final_test_pred)) == {0, 1}:
    label_map = {0: 'Extrovert', 1: 'Introvert'}
    final_test_pred = pd.Series(final_test_pred).map(label_map).values

submission = sample_submission.copy()
target_col = submission.columns[1]
submission[target_col] = final_test_pred

# Automatyczne nadawanie nazwy pliku
output_dir = '../outputs'
existing = [f for f in os.listdir(output_dir) if f.startswith('submission') and f.endswith('.csv')]
if 'submission.csv' in existing:
    # Szukamy submissionN.csv
    nums = [int(f.replace('submission', '').replace('.csv', '')) for f in existing if f != 'submission.csv' and f.replace('submission', '').replace('.csv', '').isdigit()]
    n = max(nums) if nums else 1
    new_name = f'submission{n+1}.csv'
else:
    new_name = 'submission.csv'

output_path = os.path.join(output_dir, new_name)
submission.to_csv(output_path, index=False)
print(f'Plik submission zapisany do {output_path}')
submission.head()

Plik submission zapisany do ../outputs\submission4.csv


Unnamed: 0,id,Personality
0,18524,Extrovert
1,18525,Introvert
2,18526,Extrovert
3,18527,Extrovert
4,18528,Introvert
