# Baseline — Playground Series S5E7
Pierwsze podejście: Baseline - LightGBM

In [2]:
import sys
import pandas as pd
import numpy as np

# Dodaj src do sys.path, by importować własne moduły
sys.path.append('../src')

from experiment_logger import log_experiment

In [3]:
TRAIN_PATH = '../../playground-series-s5e7/train.csv'
TEST_PATH = '../../playground-series-s5e7/test.csv'

train_data = pd.read_csv(TRAIN_PATH)
test_data = pd.read_csv(TEST_PATH)

print('Train shape:', train_data.shape)
print('Test shape:', test_data.shape)
train_data.head()

Train shape: (18524, 9)
Test shape: (6175, 8)


Unnamed: 0,id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,0,0.0,No,6.0,4.0,No,15.0,5.0,Extrovert
1,1,1.0,No,7.0,3.0,No,10.0,8.0,Extrovert
2,2,6.0,Yes,1.0,0.0,,3.0,0.0,Introvert
3,3,3.0,No,7.0,3.0,No,11.0,5.0,Extrovert
4,4,1.0,No,4.0,4.0,No,13.0,,Extrovert


In [4]:
train_data.isna().sum()

id                              0
Time_spent_Alone             1190
Stage_fear                   1893
Social_event_attendance      1180
Going_outside                1466
Drained_after_socializing    1149
Friends_circle_size          1054
Post_frequency               1264
Personality                     0
dtype: int64

In [5]:
test_data.isna().sum()

id                             0
Time_spent_Alone             425
Stage_fear                   598
Social_event_attendance      397
Going_outside                466
Drained_after_socializing    432
Friends_circle_size          350
Post_frequency               408
dtype: int64

In [6]:
train_data.info()
train_data.describe()
train_data['Personality'].value_counts(normalize=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18524 entries, 0 to 18523
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18524 non-null  int64  
 1   Time_spent_Alone           17334 non-null  float64
 2   Stage_fear                 16631 non-null  object 
 3   Social_event_attendance    17344 non-null  float64
 4   Going_outside              17058 non-null  float64
 5   Drained_after_socializing  17375 non-null  object 
 6   Friends_circle_size        17470 non-null  float64
 7   Post_frequency             17260 non-null  float64
 8   Personality                18524 non-null  object 
dtypes: float64(5), int64(1), object(3)
memory usage: 1.3+ MB


Personality
Extrovert    0.739527
Introvert    0.260473
Name: proportion, dtype: float64

In [7]:
train_data.drop(columns=['id'], inplace=True)
test_data.drop(columns=['id'], inplace=True)

In [8]:
# Załóżmy, że usunąłeś już kolumnę 'id' z train i test
from data_utils import split_numerical_categorical
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer

# Zakładam, że X_train to Twój DataFrame z cechami (bez targetu i id)
target = train_data['Personality']
train_data.drop(columns=['Personality'], inplace=True)
numerical_cols, categorical_cols = split_numerical_categorical(train_data)

print("Zmienne numeryczne:", numerical_cols)
print("Zmienne kategoryczne:", categorical_cols)

# Łączymy train i test, by mieć spójny encoding i imputację
full = pd.concat([train_data, test_data], axis=0, ignore_index=True)

# Imputacja numeryczna
imputer = IterativeImputer(random_state=42)
full[numerical_cols] = imputer.fit_transform(full[numerical_cols])

print("Kolumny w full:", full.columns.tolist())
print("Kolumny kategorialne:", categorical_cols)

# One-hot encoding
full_encoded = pd.get_dummies(full, columns=categorical_cols)

# Rozdzielamy z powrotem
X_train = full_encoded.iloc[:len(train_data)]
X_test = full_encoded.iloc[len(train_data):]
y_train = target

Zmienne numeryczne: ['Time_spent_Alone', 'Social_event_attendance', 'Going_outside', 'Friends_circle_size', 'Post_frequency']
Zmienne kategoryczne: ['Stage_fear', 'Drained_after_socializing']
Kolumny w full: ['Time_spent_Alone', 'Stage_fear', 'Social_event_attendance', 'Going_outside', 'Drained_after_socializing', 'Friends_circle_size', 'Post_frequency']
Kolumny kategorialne: ['Stage_fear', 'Drained_after_socializing']


In [9]:
X_train.head()

Unnamed: 0,Time_spent_Alone,Social_event_attendance,Going_outside,Friends_circle_size,Post_frequency,Stage_fear_No,Stage_fear_Yes,Drained_after_socializing_No,Drained_after_socializing_Yes
0,0.0,6.0,4.0,15.0,5.0,True,False,True,False
1,1.0,7.0,3.0,10.0,8.0,True,False,True,False
2,6.0,1.0,0.0,3.0,0.0,False,True,False,False
3,3.0,7.0,3.0,11.0,5.0,True,False,True,False
4,1.0,4.0,4.0,13.0,,True,False,True,False


In [10]:
print(X_train.shape)
print(X_test.shape)

(18524, 9)
(6175, 9)


In [11]:
print(X_test.dtypes)
print(X_train.dtypes)
print(X_test.head())

Time_spent_Alone                 float64
Social_event_attendance          float64
Going_outside                    float64
Friends_circle_size              float64
Post_frequency                   float64
Stage_fear_No                       bool
Stage_fear_Yes                      bool
Drained_after_socializing_No        bool
Drained_after_socializing_Yes       bool
dtype: object
Time_spent_Alone                 float64
Social_event_attendance          float64
Going_outside                    float64
Friends_circle_size              float64
Post_frequency                   float64
Stage_fear_No                       bool
Stage_fear_Yes                      bool
Drained_after_socializing_No        bool
Drained_after_socializing_Yes       bool
dtype: object
       Time_spent_Alone  Social_event_attendance  Going_outside  \
18524               3.0                      7.0            4.0   
18525               NaN                      0.0            0.0   
18526               3.0          

In [12]:
# Zakoduj target na liczby
y_train = y_train.map({'Extrovert': 0, 'Introvert': 1})

# Sprawdź, czy wszystko jest OK
print("Unikalne wartości y_train:", y_train.unique())
print("Typ y_train:", y_train.dtype)

Unikalne wartości y_train: [0 1]
Typ y_train: int64


In [13]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import xgboost as xgb
import numpy as np
from scipy.stats import mode
from xgboost.callback import EarlyStopping

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
scores = []
test_preds = np.zeros((len(X_test), 10))

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
    
    model = xgb.XGBClassifier(
        n_estimators=1000,
        learning_rate=0.05,
        random_state=42,
        n_jobs=-1,
        use_label_encoder=False,
        eval_metric='logloss'
    )

    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
    )
    
    val_pred = model.predict(X_val)
    score = accuracy_score(y_val, val_pred)
    scores.append(score)
    
    # Predykcje na test dla każdego folda (do uśrednienia)
    test_preds[:, fold] = model.predict_proba(X_test)[:, 1]

# Uśrednienie predykcji po foldach
mean_preds = test_preds.mean(axis=1)
final_test_pred = (mean_preds > 0.5).astype(int)
print(f'Fold accuracy: {scores}')
print(f'Mean CV accuracy: {np.mean(scores):.4f}')

[0]	validation_0-logloss:0.53233
[1]	validation_0-logloss:0.49684
[2]	validation_0-logloss:0.46587
[3]	validation_0-logloss:0.43849
[4]	validation_0-logloss:0.41413
[5]	validation_0-logloss:0.39222
[6]	validation_0-logloss:0.37235
[7]	validation_0-logloss:0.35429
[8]	validation_0-logloss:0.33776
[9]	validation_0-logloss:0.32272
[10]	validation_0-logloss:0.30900
[11]	validation_0-logloss:0.29626
[12]	validation_0-logloss:0.28461
[13]	validation_0-logloss:0.27377
[14]	validation_0-logloss:0.26374
[15]	validation_0-logloss:0.25443
[16]	validation_0-logloss:0.24587
[17]	validation_0-logloss:0.23782
[18]	validation_0-logloss:0.23041
[19]	validation_0-logloss:0.22350
[20]	validation_0-logloss:0.21700
[21]	validation_0-logloss:0.21099
[22]	validation_0-logloss:0.20545
[23]	validation_0-logloss:0.20028
[24]	validation_0-logloss:0.19539
[25]	validation_0-logloss:0.19088


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[26]	validation_0-logloss:0.18659
[27]	validation_0-logloss:0.18264
[28]	validation_0-logloss:0.17897
[29]	validation_0-logloss:0.17548
[30]	validation_0-logloss:0.17227
[31]	validation_0-logloss:0.16931
[32]	validation_0-logloss:0.16646
[33]	validation_0-logloss:0.16382
[34]	validation_0-logloss:0.16134
[35]	validation_0-logloss:0.15904
[36]	validation_0-logloss:0.15689
[37]	validation_0-logloss:0.15490
[38]	validation_0-logloss:0.15304
[39]	validation_0-logloss:0.15132
[40]	validation_0-logloss:0.14970
[41]	validation_0-logloss:0.14822
[42]	validation_0-logloss:0.14684
[43]	validation_0-logloss:0.14559
[44]	validation_0-logloss:0.14436
[45]	validation_0-logloss:0.14326
[46]	validation_0-logloss:0.14223
[47]	validation_0-logloss:0.14128
[48]	validation_0-logloss:0.14040
[49]	validation_0-logloss:0.13961
[50]	validation_0-logloss:0.13880
[51]	validation_0-logloss:0.13813
[52]	validation_0-logloss:0.13745
[53]	validation_0-logloss:0.13686
[54]	validation_0-logloss:0.13635
[55]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[50]	validation_0-logloss:0.13269
[51]	validation_0-logloss:0.13198
[52]	validation_0-logloss:0.13134
[53]	validation_0-logloss:0.13069
[54]	validation_0-logloss:0.13021
[55]	validation_0-logloss:0.12966
[56]	validation_0-logloss:0.12916
[57]	validation_0-logloss:0.12884
[58]	validation_0-logloss:0.12843
[59]	validation_0-logloss:0.12816
[60]	validation_0-logloss:0.12775
[61]	validation_0-logloss:0.12734
[62]	validation_0-logloss:0.12702
[63]	validation_0-logloss:0.12669
[64]	validation_0-logloss:0.12643
[65]	validation_0-logloss:0.12616
[66]	validation_0-logloss:0.12591
[67]	validation_0-logloss:0.12566
[68]	validation_0-logloss:0.12539
[69]	validation_0-logloss:0.12517
[70]	validation_0-logloss:0.12499
[71]	validation_0-logloss:0.12483
[72]	validation_0-logloss:0.12467
[73]	validation_0-logloss:0.12453
[74]	validation_0-logloss:0.12446
[75]	validation_0-logloss:0.12443
[76]	validation_0-logloss:0.12434
[77]	validation_0-logloss:0.12418
[78]	validation_0-logloss:0.12406
[79]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[37]	validation_0-logloss:0.15110
[38]	validation_0-logloss:0.14920
[39]	validation_0-logloss:0.14739
[40]	validation_0-logloss:0.14569
[41]	validation_0-logloss:0.14407
[42]	validation_0-logloss:0.14261
[43]	validation_0-logloss:0.14121
[44]	validation_0-logloss:0.13988
[45]	validation_0-logloss:0.13869
[46]	validation_0-logloss:0.13760
[47]	validation_0-logloss:0.13657
[48]	validation_0-logloss:0.13561
[49]	validation_0-logloss:0.13466
[50]	validation_0-logloss:0.13375
[51]	validation_0-logloss:0.13295
[52]	validation_0-logloss:0.13218
[53]	validation_0-logloss:0.13147
[54]	validation_0-logloss:0.13080
[55]	validation_0-logloss:0.13023
[56]	validation_0-logloss:0.12968
[57]	validation_0-logloss:0.12926
[58]	validation_0-logloss:0.12873
[59]	validation_0-logloss:0.12832
[60]	validation_0-logloss:0.12800
[61]	validation_0-logloss:0.12760
[62]	validation_0-logloss:0.12722
[63]	validation_0-logloss:0.12688
[64]	validation_0-logloss:0.12664
[65]	validation_0-logloss:0.12642
[66]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[44]	validation_0-logloss:0.15767
[45]	validation_0-logloss:0.15669
[46]	validation_0-logloss:0.15580
[47]	validation_0-logloss:0.15503
[48]	validation_0-logloss:0.15431
[49]	validation_0-logloss:0.15367
[50]	validation_0-logloss:0.15304
[51]	validation_0-logloss:0.15243
[52]	validation_0-logloss:0.15188
[53]	validation_0-logloss:0.15143
[54]	validation_0-logloss:0.15104
[55]	validation_0-logloss:0.15057
[56]	validation_0-logloss:0.15016
[57]	validation_0-logloss:0.14981
[58]	validation_0-logloss:0.14950
[59]	validation_0-logloss:0.14919
[60]	validation_0-logloss:0.14884
[61]	validation_0-logloss:0.14857
[62]	validation_0-logloss:0.14844
[63]	validation_0-logloss:0.14822
[64]	validation_0-logloss:0.14804
[65]	validation_0-logloss:0.14789
[66]	validation_0-logloss:0.14779
[67]	validation_0-logloss:0.14768
[68]	validation_0-logloss:0.14760
[69]	validation_0-logloss:0.14756
[70]	validation_0-logloss:0.14752
[71]	validation_0-logloss:0.14746
[72]	validation_0-logloss:0.14742
[73]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[49]	validation_0-logloss:0.14745
[50]	validation_0-logloss:0.14678
[51]	validation_0-logloss:0.14615
[52]	validation_0-logloss:0.14555
[53]	validation_0-logloss:0.14502
[54]	validation_0-logloss:0.14450
[55]	validation_0-logloss:0.14401
[56]	validation_0-logloss:0.14359
[57]	validation_0-logloss:0.14319
[58]	validation_0-logloss:0.14279
[59]	validation_0-logloss:0.14241
[60]	validation_0-logloss:0.14208
[61]	validation_0-logloss:0.14188
[62]	validation_0-logloss:0.14162
[63]	validation_0-logloss:0.14134
[64]	validation_0-logloss:0.14115
[65]	validation_0-logloss:0.14094
[66]	validation_0-logloss:0.14078
[67]	validation_0-logloss:0.14048
[68]	validation_0-logloss:0.14034
[69]	validation_0-logloss:0.14028
[70]	validation_0-logloss:0.14013
[71]	validation_0-logloss:0.13998
[72]	validation_0-logloss:0.13986
[73]	validation_0-logloss:0.13970
[74]	validation_0-logloss:0.13964
[75]	validation_0-logloss:0.13953
[76]	validation_0-logloss:0.13939
[77]	validation_0-logloss:0.13934
[78]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[4]	validation_0-logloss:0.41576
[5]	validation_0-logloss:0.39408
[6]	validation_0-logloss:0.37455
[7]	validation_0-logloss:0.35668
[8]	validation_0-logloss:0.34041
[9]	validation_0-logloss:0.32552
[10]	validation_0-logloss:0.31188
[11]	validation_0-logloss:0.29930
[12]	validation_0-logloss:0.28779
[13]	validation_0-logloss:0.27706
[14]	validation_0-logloss:0.26718
[15]	validation_0-logloss:0.25803
[16]	validation_0-logloss:0.24955
[17]	validation_0-logloss:0.24166
[18]	validation_0-logloss:0.23435
[19]	validation_0-logloss:0.22754
[20]	validation_0-logloss:0.22123
[21]	validation_0-logloss:0.21532
[22]	validation_0-logloss:0.20986
[23]	validation_0-logloss:0.20481
[24]	validation_0-logloss:0.20005
[25]	validation_0-logloss:0.19559
[26]	validation_0-logloss:0.19150
[27]	validation_0-logloss:0.18764
[28]	validation_0-logloss:0.18405
[29]	validation_0-logloss:0.18070
[30]	validation_0-logloss:0.17761
[31]	validation_0-logloss:0.17472
[32]	validation_0-logloss:0.17200
[33]	validation_0-lo

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[36]	validation_0-logloss:0.15203
[37]	validation_0-logloss:0.14999
[38]	validation_0-logloss:0.14808
[39]	validation_0-logloss:0.14627
[40]	validation_0-logloss:0.14460
[41]	validation_0-logloss:0.14308
[42]	validation_0-logloss:0.14163
[43]	validation_0-logloss:0.14026
[44]	validation_0-logloss:0.13910
[45]	validation_0-logloss:0.13787
[46]	validation_0-logloss:0.13680
[47]	validation_0-logloss:0.13572
[48]	validation_0-logloss:0.13484
[49]	validation_0-logloss:0.13393
[50]	validation_0-logloss:0.13302
[51]	validation_0-logloss:0.13236
[52]	validation_0-logloss:0.13162
[53]	validation_0-logloss:0.13101
[54]	validation_0-logloss:0.13031
[55]	validation_0-logloss:0.12978
[56]	validation_0-logloss:0.12928
[57]	validation_0-logloss:0.12875
[58]	validation_0-logloss:0.12828
[59]	validation_0-logloss:0.12787
[60]	validation_0-logloss:0.12744
[61]	validation_0-logloss:0.12705
[62]	validation_0-logloss:0.12667
[63]	validation_0-logloss:0.12640
[64]	validation_0-logloss:0.12613
[65]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[49]	validation_0-logloss:0.14256
[50]	validation_0-logloss:0.14173
[51]	validation_0-logloss:0.14105
[52]	validation_0-logloss:0.14042
[53]	validation_0-logloss:0.13978
[54]	validation_0-logloss:0.13925
[55]	validation_0-logloss:0.13871
[56]	validation_0-logloss:0.13831
[57]	validation_0-logloss:0.13787
[58]	validation_0-logloss:0.13746
[59]	validation_0-logloss:0.13713
[60]	validation_0-logloss:0.13667
[61]	validation_0-logloss:0.13637
[62]	validation_0-logloss:0.13603
[63]	validation_0-logloss:0.13568
[64]	validation_0-logloss:0.13547
[65]	validation_0-logloss:0.13526
[66]	validation_0-logloss:0.13504
[67]	validation_0-logloss:0.13483
[68]	validation_0-logloss:0.13470
[69]	validation_0-logloss:0.13464
[70]	validation_0-logloss:0.13455
[71]	validation_0-logloss:0.13450
[72]	validation_0-logloss:0.13445
[73]	validation_0-logloss:0.13443
[74]	validation_0-logloss:0.13435
[75]	validation_0-logloss:0.13438
[76]	validation_0-logloss:0.13421
[77]	validation_0-logloss:0.13404
[78]	validati

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[49]	validation_0-logloss:0.13132
[50]	validation_0-logloss:0.13044
[51]	validation_0-logloss:0.12973
[52]	validation_0-logloss:0.12902
[53]	validation_0-logloss:0.12827
[54]	validation_0-logloss:0.12764
[55]	validation_0-logloss:0.12703
[56]	validation_0-logloss:0.12654
[57]	validation_0-logloss:0.12604
[58]	validation_0-logloss:0.12558
[59]	validation_0-logloss:0.12510
[60]	validation_0-logloss:0.12470
[61]	validation_0-logloss:0.12437
[62]	validation_0-logloss:0.12400
[63]	validation_0-logloss:0.12366
[64]	validation_0-logloss:0.12330
[65]	validation_0-logloss:0.12308
[66]	validation_0-logloss:0.12276
[67]	validation_0-logloss:0.12258
[68]	validation_0-logloss:0.12236
[69]	validation_0-logloss:0.12217
[70]	validation_0-logloss:0.12199
[71]	validation_0-logloss:0.12177
[72]	validation_0-logloss:0.12157
[73]	validation_0-logloss:0.12145
[74]	validation_0-logloss:0.12136
[75]	validation_0-logloss:0.12123
[76]	validation_0-logloss:0.12114
[77]	validation_0-logloss:0.12104
[78]	validatio

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



[33]	validation_0-logloss:0.15769
[34]	validation_0-logloss:0.15518
[35]	validation_0-logloss:0.15282
[36]	validation_0-logloss:0.15057
[37]	validation_0-logloss:0.14854
[38]	validation_0-logloss:0.14662
[39]	validation_0-logloss:0.14483
[40]	validation_0-logloss:0.14323
[41]	validation_0-logloss:0.14163
[42]	validation_0-logloss:0.14021
[43]	validation_0-logloss:0.13891
[44]	validation_0-logloss:0.13759
[45]	validation_0-logloss:0.13640
[46]	validation_0-logloss:0.13537
[47]	validation_0-logloss:0.13439
[48]	validation_0-logloss:0.13337
[49]	validation_0-logloss:0.13251
[50]	validation_0-logloss:0.13170
[51]	validation_0-logloss:0.13089
[52]	validation_0-logloss:0.13017
[53]	validation_0-logloss:0.12956
[54]	validation_0-logloss:0.12896
[55]	validation_0-logloss:0.12836
[56]	validation_0-logloss:0.12793
[57]	validation_0-logloss:0.12753
[58]	validation_0-logloss:0.12704
[59]	validation_0-logloss:0.12664
[60]	validation_0-logloss:0.12628
[61]	validation_0-logloss:0.12574
[62]	validati

In [14]:
import sys
sys.path.append('../src')

from experiment_logger import log_experiment

params = {
    'model': 'XGBClassifier',
    'encoding': 'one-hot',
    'n_splits': 10,
    'learning_rate': 0.05,
    'n_estimators': 1000,
    'random_state': 42
}

log_experiment(
    experiment_name='xgb_ohe_10fold',
    model_name='XGBClassifier',
    params=params,
    cv_score=np.mean(scores),
    comment='XGBoost, one-hot encoding, 10-fold CV - mean probability (uśredniona predykcja po foldach), Personality zakodowany na liczby, numeryczna imputacja (IterativeImputer) przed one-hot encodingiem'
)
print('Eksperyment został zalogowany!')

Eksperyment został zalogowany!


In [15]:
import os
# Wczytaj sample_submission, aby pobrać wymagane kolumny i kolejność
sample_submission = pd.read_csv('../../playground-series-s5e7/sample_submission.csv')

# Zakładam, że predykcje są w zmiennej final_test_pred (np. jako liczby lub kategorie)
if set(np.unique(final_test_pred)) == {0, 1}:
    label_map = {0: 'Extrovert', 1: 'Introvert'}
    final_test_pred = pd.Series(final_test_pred).map(label_map).values

submission = sample_submission.copy()
target_col = submission.columns[1]
submission[target_col] = final_test_pred

# Automatyczne nadawanie nazwy pliku
output_dir = '../outputs'
existing = [f for f in os.listdir(output_dir) if f.startswith('submission') and f.endswith('.csv')]
if 'submission.csv' in existing:
    # Szukamy submissionN.csv
    nums = [int(f.replace('submission', '').replace('.csv', '')) for f in existing if f != 'submission.csv' and f.replace('submission', '').replace('.csv', '').isdigit()]
    n = max(nums) if nums else 1
    new_name = f'submission{n+1}.csv'
else:
    new_name = 'submission.csv'

output_path = os.path.join(output_dir, new_name)
submission.to_csv(output_path, index=False)
print(f'Plik submission zapisany do {output_path}')
submission.head()

Plik submission zapisany do ../outputs\submission3.csv


Unnamed: 0,id,Personality
0,18524,Extrovert
1,18525,Introvert
2,18526,Extrovert
3,18527,Extrovert
4,18528,Introvert
