# **Imports**

In [None]:
%%time

!pip install -qq scikit-learn==1.6.1

CPU times: user 14.8 ms, sys: 1.86 ms, total: 16.6 ms
Wall time: 2.51 s


In [None]:
!nvidia-smi


Sat Apr 26 17:36:02 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   31C    P0             45W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import pandas as pd
import numpy as np
import optuna
import time
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost.callback import EarlyStopping

In [None]:
%%time

from tqdm import tqdm
from itertools import combinations
from xgboost import XGBRegressor


import numpy as np
import pandas as pd
import polars as pl

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import KFold
from sklearn.preprocessing import TargetEncoder

import lightgbm as lgb

from cuml.preprocessing import TargetEncoder
import cudf
from sklearn.metrics import mean_squared_error

import warnings
warnings.simplefilter('ignore')

CPU times: user 7.55 s, sys: 2.24 s, total: 9.79 s
Wall time: 14.8 s


# **Load + Preprocessing**

In [None]:
%%time

def feature_eng(df):

    df['Episode_Num'] = df['Episode_Title'].str[8:].astype('category')
    df = df.drop(columns=['Episode_Title'])
    return df

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 6.2 µs


In [None]:
%%time

df_train = pd.read_csv('/content/train.csv', index_col='id')
df_train = feature_eng(df_train)

df_test = pd.read_csv('/content/test.csv', index_col='id')
df_test = feature_eng(df_test)

df_subm = pd.read_csv('/content/sample_submission.csv', index_col='id')

cat_c = ['Episode_Num', 'Publication_Day', 'Publication_Time', 'Episode_Sentiment','Podcast_Name','Genre']

def update(df):

    for col in cat_c:
        df[col] = df[col].astype('category')
    return df

df_train = update(df_train)
df_test = update(df_test)

CPU times: user 1.78 s, sys: 285 ms, total: 2.06 s
Wall time: 2.08 s


In [None]:
%%time

def n_fe(df):
    import numpy as np

    df['Is_Weekend'] = df['Publication_Day'].isin(['Saturday', 'Sunday']).astype(int)
    df['Is_High_Host_Popularity'] = (df['Host_Popularity_percentage'] > 70).astype(int)
    df['Is_High_Guest_Popularity'] = (df['Guest_Popularity_percentage'] > 70).astype(int)
    df['Host_Guest_Popularity_Gap'] = df['Host_Popularity_percentage'] - df['Guest_Popularity_percentage']
    df['Ad_Density'] = df['Number_of_Ads'] / df['Episode_Length_minutes']
    df['Ad_Density'].replace([np.inf, -np.inf], np.nan, inplace=True)
    df['Is_Long_Episode'] = (df['Episode_Length_minutes'] > 60).astype(int)

    return df

df_train = n_fe(df_train)
df_test = n_fe(df_test)

CPU times: user 51.8 ms, sys: 1.79 ms, total: 53.6 ms
Wall time: 55.9 ms


In [None]:
print(df_train.columns.tolist())


['Podcast_Name', 'Episode_Length_minutes', 'Genre', 'Host_Popularity_percentage', 'Publication_Day', 'Publication_Time', 'Guest_Popularity_percentage', 'Number_of_Ads', 'Episode_Sentiment', 'Listening_Time_minutes', 'Episode_Num', 'Is_Weekend', 'Is_High_Host_Popularity', 'Is_High_Guest_Popularity', 'Host_Guest_Popularity_Gap', 'Ad_Density', 'Is_Long_Episode']


In [None]:
def add_stat_features(df_train, df_test):
    #Среднее время прослушивания по подкасту
    podcast_mean = df_train.groupby('Podcast_Name')['Listening_Time_minutes'].mean()
    df_train['mean_listen_time_per_podcast'] = df_train['Podcast_Name'].map(podcast_mean)
    df_test['mean_listen_time_per_podcast'] = df_test['Podcast_Name'].map(podcast_mean)

    #Среднее время по жанру
    genre_mean = df_train.groupby('Genre')['Listening_Time_minutes'].mean()
    df_train['mean_listen_time_per_genre'] = df_train['Genre'].map(genre_mean)
    df_test['mean_listen_time_per_genre'] = df_test['Genre'].map(genre_mean)

    #Среднее кол-во реклам по дню недели
    ads_mean = df_train.groupby('Publication_Day')['Number_of_Ads'].mean()
    df_train['mean_ads_per_day'] = df_train['Publication_Day'].map(ads_mean)
    df_test['mean_ads_per_day'] = df_test['Publication_Day'].map(ads_mean)

    return df_train, df_test
df_train, df_test = add_stat_features(df_train, df_test)

In [None]:
%%time

encode_columns = ['Episode_Length_minutes', 'Episode_Num', 'Host_Popularity_percentage', 'Number_of_Ads', 'Episode_Sentiment', 'Publication_Day', 'Publication_Time']
pair_size = [2, 3, 4, 5]

for r in pair_size:
    for cols in tqdm(list(combinations(encode_columns, r))):
        new_col_name = '_'.join(cols)

        df_train[new_col_name] = df_train[list(cols)].astype(str).agg('_'.join, axis=1)
        df_train[new_col_name] = df_train[new_col_name].astype('category')

        df_test[new_col_name] = df_test[list(cols)].astype(str).agg('_'.join, axis=1)
        df_test[new_col_name] = df_test[new_col_name].astype('category')

100%|██████████| 21/21 [01:42<00:00,  4.90s/it]
100%|██████████| 35/35 [03:30<00:00,  6.01s/it]
100%|██████████| 35/35 [04:10<00:00,  7.16s/it]
100%|██████████| 21/21 [02:48<00:00,  8.00s/it]

CPU times: user 12min 1s, sys: 12.3 s, total: 12min 13s
Wall time: 12min 11s





# **Train**

In [None]:
%%time

X = df_train.drop(columns=['Listening_Time_minutes'])
y = df_train['Listening_Time_minutes']

CPU times: user 77.7 ms, sys: 137 ms, total: 215 ms
Wall time: 215 ms


In [None]:
pip install -U xgboost


Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.0-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.9/253.9 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xgboost
  Attempting uninstall: xgboost
    Found existing installation: xgboost 2.1.4
    Uninstalling xgboost-2.1.4:
      Successfully uninstalled xgboost-2.1.4
Successfully installed xgboost-3.0.0


In [None]:
import xgboost
print(xgboost.__version__)  # должно быть >= 2.0.0

2.1.4


In [None]:
print(XGBRegressor)

<class 'xgboost.sklearn.XGBRegressor'>


In [None]:
pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
import optuna
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from cuml.preprocessing import TargetEncoder
import numpy as np
import time

def objective(trial):
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 0.002, 0.05, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 2000, 5000),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.5),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 3.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0),
        'enable_categorical': True,
        'tree_method': 'gpu_hist',
        'eval_metric': 'rmse',
    }

    # Разделение
    X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    X_tr = X_tr.copy()
    X_val = X_val.copy()

    # Кодирование
    encoded_columns = [c for c in X.columns if X[c].dtype.name == 'category']
    for c in encoded_columns:
        encoder = TargetEncoder(
            n_folds=5,
            smooth=0,
            split_method='random',
            stat='mean'
        )
        X_tr[c] = encoder.fit_transform(X_tr[[c]], y_tr)
        X_val[c] = encoder.transform(X_val[[c]])

    # Обучение
    model = xgb.XGBRegressor(**params, random_state=42, verbosity=0)

    start = time.time()

    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        verbose=100
    )

    y_pred = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))

    print(f"[Trial {trial.number}] RMSE: {rmse:.5f} | Time: {time.time() - start:.1f}s")
    return rmse

# Optuna запуск
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, show_progress_bar=True)

print('🏆 Best RMSE:', study.best_value)
print('🎯 Best hyperparameters:', study.best_params)


[I 2025-04-26 18:17:56,493] A new study created in memory with name: no-name-adbdff96-d7c1-4d59-ab60-a3f32ee4fa1d


  0%|          | 0/50 [00:01<?, ?it/s]

[0]	validation_0-rmse:27.07401
[100]	validation_0-rmse:22.59873
[200]	validation_0-rmse:19.36298
[300]	validation_0-rmse:17.07886
[400]	validation_0-rmse:15.50803
[500]	validation_0-rmse:14.44996
[600]	validation_0-rmse:13.74956
[700]	validation_0-rmse:13.28926
[800]	validation_0-rmse:12.98723
[900]	validation_0-rmse:12.78774
[1000]	validation_0-rmse:12.65456
[1100]	validation_0-rmse:12.56353
[1200]	validation_0-rmse:12.50059
[1300]	validation_0-rmse:12.45613
[1400]	validation_0-rmse:12.42380
[1500]	validation_0-rmse:12.40039
[1600]	validation_0-rmse:12.38315
[1700]	validation_0-rmse:12.36885
[1800]	validation_0-rmse:12.35796
[1900]	validation_0-rmse:12.34848
[2000]	validation_0-rmse:12.34120
[2100]	validation_0-rmse:12.33563
[2200]	validation_0-rmse:12.33062
[2300]	validation_0-rmse:12.32614
[2400]	validation_0-rmse:12.32191
[2500]	validation_0-rmse:12.31797
[2600]	validation_0-rmse:12.31436
[2700]	validation_0-rmse:12.31095
[2800]	validation_0-rmse:12.30749
[2900]	validation_0-rmse:1

In [None]:
df_test.index

Index([750000, 750001, 750002, 750003, 750004, 750005, 750006, 750007, 750008,
       750009,
       ...
       999990, 999991, 999992, 999993, 999994, 999995, 999996, 999997, 999998,
       999999],
      dtype='int64', name='id', length=250000)

In [None]:
!pip install catboost


Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
def optimize_catboost(X, y, n_trials=50):
    def objective(trial):
        params = {
            'iterations': trial.suggest_int('iterations', 3000, 6000),
            'learning_rate': trial.suggest_float('learning_rate', 0.002, 0.05, log=True),
            'depth': trial.suggest_int('depth', 4, 12),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.5, 1.0),
            'bootstrap_type': trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli']),
            'random_strength': trial.suggest_float('random_strength', 0.5, 2.0),
            'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 1, 20),
            'loss_function': 'RMSE',
            'task_type': 'GPU',
            'early_stopping_rounds': 100,
            'verbose': False,
        }


        X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        X_tr = X_tr.copy()
        X_val = X_val.copy()


        for col in X_tr.columns:
            if not pd.api.types.is_numeric_dtype(X_tr[col]):
                X_tr[col] = X_tr[col].astype(str).astype('category')
                X_val[col] = X_val[col].astype(str).astype('category')


        cat_features = [i for i, col in enumerate(X_tr.columns) if str(X_tr[col].dtype) == 'category']


        train_pool = Pool(X_tr, y_tr, cat_features=cat_features)
        val_pool = Pool(X_val, y_val, cat_features=cat_features)


        model = CatBoostRegressor(**params)

        start = time.time()
        model.fit(train_pool, eval_set=val_pool)

        y_pred = model.predict(X_val)
        rmse = mean_squared_error(y_val, y_pred, squared=False)

        print(f"[Trial {trial.number}] RMSE: {rmse:.5f} | Time: {time.time() - start:.1f}s")
        return rmse

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

    print('🏆 Best CatBoost RMSE:', study.best_value)
    print('🎯 Best CatBoost hyperparameters:', study.best_params)

    return study

study_catboost = optimize_catboost(X, y, n_trials=50)



[I 2025-04-26 18:08:18,972] A new study created in memory with name: no-name-bdaebd4e-72b0-41be-bc58-2a46f94271eb


  0%|          | 0/50 [00:00<?, ?it/s]

[W 2025-04-26 18:09:56,286] Trial 0 failed with parameters: {'iterations': 5158, 'learning_rate': 0.03744976930582472, 'depth': 9, 'l2_leaf_reg': 7.076460622567708, 'subsample': 0.7921589559224818, 'colsample_bylevel': 0.6703497737071957, 'bootstrap_type': 'Bayesian', 'random_strength': 1.7503209980164598, 'min_data_in_leaf': 18} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "<ipython-input-22-32f9737f5163>", line 35, in objective
    X_tr[col] = X_tr[col].astype(str).astype('category')
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/generic.py", line 6643, in astype
    new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 

In [None]:
%%time
import xgboost as xgb

from cuml.preprocessing import TargetEncoder
import numpy as np
import time

best_params = {
    'learning_rate': 0.0020497845359063525,
    'n_estimators': 7000,
    'max_depth': 12,
    'subsample': 0.9655694188482005,
    'colsample_bytree': 0.6870837735159103,
    'gamma': 0.7721097649862025,
    'min_child_weight': 6,
    'reg_alpha': 0.6278037775591339,
    'reg_lambda': 0.5018385676893943,
    'enable_categorical': True,
    'tree_method': 'gpu_hist',
    'eval_metric': 'rmse',
}


X_train_final = X.copy()
X_test_final = df_test[X.columns].copy()


encoded_columns = [c for c in X.columns if X[c].dtype.name == 'category']

for c in encoded_columns:
    encoder = TargetEncoder(
        n_folds=5,
        smooth=0,
        split_method='random',
        stat='mean'
    )
    X_train_final[c] = encoder.fit_transform(X_train_final[[c]], y)
    X_test_final[c] = encoder.transform(X_test_final[[c]])

final_model = xgb.XGBRegressor(**best_params, random_state=42)

final_model.fit(X_train_final, y)


y_test_pred = final_model.predict(X_test_final)


predictions_df = pd.DataFrame({
    'id': X_test_final.index,
    'Listening_Time_minutes': y_test_pred
})


predictions_df.to_csv('predictions.csv', index=False)



In [None]:

model = xgb.XGBRegressor(
    learning_rate=0.039862590946263626,
    n_estimators=1740,
    max_depth=10,
    subsample=0.9120336481616528,
    colsample_bytree=0.8460012797592912,
    gamma=0.0860618058362767,
    min_child_weight=8,
    alpha=0.8837088997885456,
    max_delta_step=9,
    reg_lambda=0.43291251953731313,
    random_state=42
)

model.fit(X_transformed, y_train)

y_test_pred = model.predict(X_test_transformed)

predictions_df = pd.DataFrame({
    'id': X_test.index,  # Индексы из X_test
    'Listening_Time_minutes': y_test_pred
})


predictions_df.to_csv('predictions.csv', index=False)



In [None]:
def TRAIN(params, encoded_column_start_index=11, n_splits=7):
    cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)
    y_pred = np.zeros(len(df_subm))
    oof = np.zeros(len(X))
    rmse_scores = []

    for fold, (idx_train, idx_valid) in enumerate(cv.split(X, y)):
        print(f"\n📦 Fold {fold + 1}")

        #GPU-поддержка
        X_train = cudf.from_pandas(X.iloc[idx_train].copy())
        X_valid = cudf.from_pandas(X.iloc[idx_valid].copy())
        X_test = cudf.from_pandas(df_test[X.columns].copy())
        y_train = cudf.Series(y.iloc[idx_train].copy())
        y_valid = y.iloc[idx_valid].copy()


        encoded_columns = X.columns[encoded_column_start_index:]
        print("🎯 Target encoding: ", end="")
        for c in tqdm(encoded_columns, desc="Encoding columns"):
            encoder = TargetEncoder(
                n_folds=5,
                smooth=0,
                split_method='random',
                stat='mean'
            )
            X_train[c] = encoder.fit_transform(X_train[[c]], y_train)
            X_valid[c] = encoder.transform(X_valid[[c]])
            X_test[c] = encoder.transform(X_test[[c]])


        X_train = X_train.to_pandas()
        X_valid = X_valid.to_pandas()
        X_test = X_test.to_pandas()
        y_train = y_train.to_pandas()


        model = XGBRegressor(**params, tree_method='gpu_hist')

        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            verbose=100
        )

        oof[idx_valid] = model.predict(X_valid)
        y_pred += model.predict(X_test)

        fold_rmse = mean_squared_error(y_valid, oof[idx_valid]) ** 0.5
        rmse_scores.append(fold_rmse)
        print(f"✅ Fold {fold + 1} RMSE: {fold_rmse:.5f}")

    overall_rmse = mean_squared_error(y, oof) ** 0.5
    print(f"\n🎯 Overall CV RMSE: {overall_rmse:.5f}")
    y_pred /= n_splits

    return y_pred, oof, overall_rmse


In [None]:
%%time
ParamsXGB = {
    'max_depth': 10,
    'learning_rate': 0.00462847749422193,
    'min_child_weight': 4,
    'subsample': 0.8244361720956633,
    'colsample_bytree': 0.5586626138810886,
    'gamma': 1.1614500954011453,
    'reg_alpha': 0.3548920754067436,
    'reg_lambda': 3.9465129148897287,
    "n_estimators": 5000,
    'enable_categorical': True,
    'eval_metric': 'rmse'  # ОБЯЗАТЕЛЬНО для XGBoost ≥ 2.0.0
}
y_pred_xgb, oof, overall_rmse = TRAIN(ParamsXGB)



📦 Fold 1
🎯 Target encoding: 

Encoding columns: 100%|██████████| 96/96 [00:58<00:00,  1.63it/s]


[0]	validation_0-rmse:26.98380
[100]	validation_0-rmse:19.62549
[200]	validation_0-rmse:15.71179
[300]	validation_0-rmse:13.79342
[400]	validation_0-rmse:12.92136
[500]	validation_0-rmse:12.53109
[600]	validation_0-rmse:12.35658
[700]	validation_0-rmse:12.27358
[800]	validation_0-rmse:12.23075
[900]	validation_0-rmse:12.20764
[1000]	validation_0-rmse:12.19551
[1100]	validation_0-rmse:12.18589
[1200]	validation_0-rmse:12.17853
[1300]	validation_0-rmse:12.17379
[1400]	validation_0-rmse:12.16999
[1500]	validation_0-rmse:12.16738
[1600]	validation_0-rmse:12.16524
[1700]	validation_0-rmse:12.16286
[1800]	validation_0-rmse:12.16057
[1900]	validation_0-rmse:12.15885
[2000]	validation_0-rmse:12.15781
[2100]	validation_0-rmse:12.15627
[2200]	validation_0-rmse:12.15502
[2300]	validation_0-rmse:12.15462
[2400]	validation_0-rmse:12.15357
[2500]	validation_0-rmse:12.15334
[2600]	validation_0-rmse:12.15231
[2700]	validation_0-rmse:12.15144
[2800]	validation_0-rmse:12.15069
[2900]	validation_0-rmse:1

Encoding columns: 100%|██████████| 96/96 [00:59<00:00,  1.62it/s]


[0]	validation_0-rmse:27.04774
[100]	validation_0-rmse:19.66726
[200]	validation_0-rmse:15.73952
[300]	validation_0-rmse:13.81340
[400]	validation_0-rmse:12.93483


In [None]:
%%time

df_subm['Listening_Time_minutes'] = y_pred_xgb
df_subm.to_csv('submission_XGB_1.csv')
df_subm.head()

CPU times: user 411 ms, sys: 5.99 ms, total: 417 ms
Wall time: 430 ms


Unnamed: 0_level_0,Listening_Time_minutes
id,Unnamed: 1_level_1
750000,54.003857
750001,23.540814
750002,49.385903
750003,79.698399
750004,48.883317
