In [1]:
!pip install -qq scikit-learn==1.6.1
!pip install pytorch_tabnet tabpfn

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.5/13.5 MB[0m [31m107.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
category-encoders 2.7.0 requires scikit-learn<1.6.0,>=1.0.0, but you have scikit-learn 1.6.1 which is incompatible.
bigframes 1.36.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.[0m[31m
[0mCollecting pytorch_tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Collecting tabpfn
  Downloading tabpfn-2.0.8-py3-none-any.whl.metadata (25 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.3->pytorch_tabnet)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (fr

In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class CFG:
    train_path: Path = Path("/kaggle/input/playground-series-s5e4/train.csv")
    test_path: Path = Path("/kaggle/input/playground-series-s5e4/test.csv")
    sub_path: Path = Path("/kaggle/input/playground-series-s5e4/sample_submission.csv")
    pltpd_path: Path = Path("/kaggle/input/podcast-listening-time-prediction-dataset/podcast_dataset.csv")
    
    num_fold: int = 5
    dev_mode: bool = False

    # Model parameters
    n_iter: int = 10000
    max_depth: int = -1
    num_leaves: int = 1024
    colsample_bytree: float = 0.7
    learning_rate: float = 0.04

    objective: str = "l2"
    metric: str = "rmse"
    verbosity: int = -1

    random_state: int = 42
    shuffle: bool = True
    encoded_columns_start: int = -91
    log_eval: int = 100
    early_stopping: int = 200

    # debug = True


cfg = CFG()


In [3]:
import gc
from itertools import combinations

import numpy as np
import polars as pl
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tqdm import tqdm

def calc_rmse(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse


re_dict = {}
re_dict["podc_dict"] = {
    "Mystery Matters": 0,
    "Joke Junction": 1,
    "Study Sessions": 2,
    "Digital Digest": 3,
    "Mind & Body": 4,
    "Fitness First": 5,
    "Criminal Minds": 6,
    "News Roundup": 7,
    "Daily Digest": 8,
    "Music Matters": 9,
    "Sports Central": 10,
    "Melody Mix": 11,
    "Game Day": 12,
    "Gadget Geek": 13,
    "Global News": 14,
    "Tech Talks": 15,
    "Sport Spot": 16,
    "Funny Folks": 17,
    "Sports Weekly": 18,
    "Business Briefs": 19,
    "Tech Trends": 20,
    "Innovators": 21,
    "Health Hour": 22,
    "Comedy Corner": 23,
    "Sound Waves": 24,
    "Brain Boost": 25,
    "Athlete's Arena": 26,
    "Wellness Wave": 27,
    "Style Guide": 28,
    "World Watch": 29,
    "Humor Hub": 30,
    "Money Matters": 31,
    "Healthy Living": 32,
    "Home & Living": 33,
    "Educational Nuggets": 34,
    "Market Masters": 35,
    "Learning Lab": 36,
    "Lifestyle Lounge": 37,
    "Crime Chronicles": 38,
    "Detective Diaries": 39,
    "Life Lessons": 40,
    "Current Affairs": 41,
    "Finance Focus": 42,
    "Laugh Line": 43,
    "True Crime Stories": 44,
    "Business Insights": 45,
    "Fashion Forward": 46,
    "Tune Time": 47,
}
re_dict["genr_dict"] = {
    "True Crime": 0,
    "Comedy": 1,
    "Education": 2,
    "Technology": 3,
    "Health": 4,
    "News": 5,
    "Music": 6,
    "Sports": 7,
    "Business": 8,
    "Lifestyle": 9,
}
re_dict["week_dict"] = {
    "Monday": 0,
    "Tuesday": 1,
    "Wednesday": 2,
    "Thursday": 3,
    "Friday": 4,
    "Saturday": 5,
    "Sunday": 6,
}
re_dict["time_dict"] = {"Morning": 10, "Afternoon": 14, "Evening": 17, "Night": 21}
re_dict["sent_dict"] = {"Negative": 0, "Neutral": 1, "Positive": 2}


pl_i_type = pl.Int32
pl_f_type = pl.Float32


def cast_numeric_dtypes(df):
    float_cols = [col for col in df.columns if df.schema[col] == pl.Float64 or df.schema[col] == pl.Float32]
    int_cols = [col for col in df.columns if df.schema[col] == pl.Int64 or df.schema[col] == pl.Int32]

    if float_cols:
        df = df.with_columns([pl.col(col).cast(pl_f_type) for col in float_cols])
    if int_cols:
        df = df.with_columns([pl.col(col).cast(pl_i_type) for col in int_cols])

    return df


def preprocess(df, df_train=None):
    df = cast_numeric_dtypes(df)
    df = df.with_columns(pl.col("Episode_Title").str.slice(8).cast(pl.Int32).alias("Episode_Num")).drop("Episode_Title")

    # Convert categorical variables using mapping
    for col, mapping in [
        ("Genre", re_dict["genr_dict"]),
        ("Podcast_Name", re_dict["podc_dict"]),
        ("Publication_Day", re_dict["week_dict"]),
        ("Publication_Time", re_dict["time_dict"]),
        ("Episode_Sentiment", re_dict["sent_dict"]),
    ]:
        df = df.with_columns(pl.col(col).replace(mapping).alias(col))

    # Cap extreme values
    df = df.with_columns(
        pl.when(pl.col("Episode_Length_minutes") > 121.0).then(121.0).otherwise(pl.col("Episode_Length_minutes")).alias("Episode_Length_minutes"),
        pl.when(pl.col("Number_of_Ads") > 103.91).then(103.91).otherwise(pl.col("Number_of_Ads")).alias("Number_of_Ads"),
    )

    # Create NaN indicator columns
    df = df.with_columns(
        pl.col("Episode_Length_minutes").is_null().cast(pl_i_type).cast(pl.Utf8).cast(pl.Categorical).alias("Episode_Length_minutes_NaN"),
        pl.col("Guest_Popularity_percentage").is_null().cast(pl_i_type).cast(pl.Utf8).cast(pl.Categorical).alias("Guest_Popularity_percentage_NaN"),
    )

    # Fill NA values with median
    if df_train is None:
        df_train = df.clone()

    e_median = df_train.select(pl.col("Episode_Length_minutes").median()).item()
    g_median = df_train.select(pl.col("Guest_Popularity_percentage").median()).item()
    n_median = df_train.select(pl.col("Number_of_Ads").median()).item()

    df = df.with_columns(
        pl.col("Episode_Length_minutes").fill_null(e_median),
        pl.col("Guest_Popularity_percentage").fill_null(g_median),
        pl.col("Number_of_Ads").fill_null(n_median),
    )

    return df


def feature_eng(df, df_train):
    # Cyclical features for day and time
    df = df.with_columns(
        # Day features
        pl.col("Publication_Day").cast(pl_f_type).mul(2 * np.pi / 7).sin().alias("Day_sin"),
        pl.col("Publication_Day").cast(pl_f_type).mul(2 * np.pi / 7).cos().alias("Day_cos"),
        pl.col("Publication_Day").cast(pl_f_type).mul(4 * np.pi / 7).sin().alias("Day_sin2"),
        pl.col("Publication_Day").cast(pl_f_type).mul(4 * np.pi / 7).cos().alias("Day_cos2"),
        # Time features
        pl.col("Publication_Time").cast(pl_f_type).mul(2 * np.pi / 4).sin().alias("Time_sin"),
        pl.col("Publication_Time").cast(pl_f_type).mul(2 * np.pi / 4).cos().alias("Time_cos"),
        pl.col("Publication_Time").cast(pl_f_type).mul(4 * np.pi / 24).sin().alias("Time_sin2"),
        pl.col("Publication_Time").cast(pl_f_type).mul(4 * np.pi / 24).cos().alias("Time_cos2"),
        # Ratio features
        (pl.col("Episode_Length_minutes") / (pl.col("Number_of_Ads") + 1)).fill_null(0).alias("Length_per_Ads"),
        (pl.col("Episode_Length_minutes") / (pl.col("Host_Popularity_percentage") + 1)).fill_null(0).alias("Length_per_Host"),
        (pl.col("Episode_Length_minutes") / (pl.col("Guest_Popularity_percentage") + 1)).fill_null(0).alias("Length_per_Guest"),
        # Episode length features
        pl.col("Episode_Length_minutes").floor().alias("ELen_Int"),
        (pl.col("Episode_Length_minutes") - pl.col("Episode_Length_minutes").floor()).alias("ELen_Dec"),
        pl.col("Host_Popularity_percentage").floor().alias("HPperc_Int"),
        (pl.col("Host_Popularity_percentage") - pl.col("Host_Popularity_percentage").floor()).alias("HPperc_Dec"),
        # Sentiment features
        (pl.col("Episode_Sentiment") == "2").cast(pl.Int8).alias("Is_Positive_Sentiment"),
        pl.when(pl.col("Episode_Sentiment") == "2").then(0.75).otherwise(0.717).cast(pl_f_type).alias("Sentiment_Multiplier"),
        # Squared features
        (pl.col("Episode_Length_minutes") ** 2).alias("Episode_Length_squared"),
        (pl.col("Episode_Length_minutes") ** 3).alias("Episode_Length_squared2"),
    )

    # Add expected listening time based on sentiment
    df = df.with_columns((pl.col("Episode_Length_minutes") * pl.col("Sentiment_Multiplier")).alias("Expected_Listening_Time_Sentiment"))

    # Convert columns to categorical
    for col in ["Podcast_Name", "Genre", "Publication_Day", "Publication_Time", "Episode_Sentiment", "Episode_Num"]:
        df = df.with_columns(pl.col(col).cast(pl.Utf8).cast(pl.Categorical))

    return df



def get_combinations(df, columns_to_encode, pair_sizes):
    df_length = len(df)

    # Create target ratios with different step sizes for different ranges using np.arange
    target_ratios = []
    target_ratios.extend(np.arange(0.001, 0.999, 0.05).tolist())

    # # Round to avoid floating point precision issues
    # target_ratios = [round(ratio, 2) for ratio in target_ratios]

    all_combinations = []
    for r in pair_sizes:
        for cols in combinations(columns_to_encode, r):
            group_counts = len(df.group_by(cols).count())
            ratio = group_counts / df_length
            all_combinations.append((cols, ratio))

    unique_combinations = set()
    for target in target_ratios:
        closest_combination = min(all_combinations, key=lambda x: abs(x[1] - target))
        unique_combinations.add(closest_combination[0])

    return list(unique_combinations)


def cols_encode(df, combinations_list):
    batch_size = 20
    for i in range(0, len(combinations_list), batch_size):
        batch = combinations_list[i : i + batch_size]

        for cols in tqdm(batch):
            new_col_name = "colen_" + "_".join(cols)
            concat_expr = pl.col(cols[0]).cast(pl.Utf8)

            for col_name in cols[1:]:
                concat_expr = concat_expr + "_" + pl.col(col_name).cast(pl.Utf8)

            df = df.with_columns(concat_expr.alias(new_col_name).cast(pl.Categorical))

        gc.collect()

        mem_usage = sum(df.estimated_size() for col in df.columns) / (1024 * 1024)
        print(f"Memory usage: {mem_usage:.2f} MB")
        print(f"Total number of columns: {len(df.columns)}")

    return df



In [4]:
import os
os.environ['TORCH_USE_CUDA_DSA'] = '1'

import gc
import numpy as np
import polars as pl
import torch
from sklearn.model_selection import KFold
from sklearn.preprocessing import TargetEncoder, LabelEncoder
from pytorch_tabnet.callbacks import Callback
from pytorch_tabnet.tab_model import TabNetRegressor
import wandb
from kaggle_secrets import UserSecretsClient
from dataclasses import asdict

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

torch.cuda.empty_cache()
torch.cuda.set_device(0)

# Custom wandb callback
class WandbCallback(Callback):
    def __init__(self):
        self.trainer = None

    def set_trainer(self, trainer):
        self.trainer = trainer

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        for metric_name, metric_value in logs.items():
            wandb.log({metric_name: metric_value})
        return False


def target_encode_polars(X_train, y_train, X_valid, X_test, columns, random_state=42):

    # Initialize the encoder
    encoder = TargetEncoder(random_state=random_state)
    
    encoded_columns = X_train.columns[before_encode_len:]
    
    # Fit and transform
    X_train_encoded = encoder.fit_transform(X_train[encoded_columns], y_train)
    X_valid_encoded = encoder.transform(X_valid[encoded_columns])
    X_test_encoded = encoder.transform(X_test[encoded_columns])
    
    # Create Polars DataFrames from the encoded results
    encoded_train_df = pl.DataFrame(
        {col: X_train_encoded[:, i] for i, col in enumerate(columns)}
    )
    encoded_valid_df = pl.DataFrame(
        {col: X_valid_encoded[:, i] for i, col in enumerate(columns)}
    )
    encoded_test_df = pl.DataFrame(
        {col: X_test_encoded[:, i] for i, col in enumerate(columns)}
    )
    
    # Drop the original columns and add the encoded ones
    X_train = X_train.drop(columns)
    X_valid = X_valid.drop(columns)
    X_test = X_test.drop(columns)
    
    # Horizontal stack with encoded columns
    X_train = pl.concat([X_train, encoded_train_df], how="horizontal")
    X_valid = pl.concat([X_valid, encoded_valid_df], how="horizontal")
    X_test = pl.concat([X_test, encoded_test_df], how="horizontal")
    
    return X_train, X_valid, X_test, encoder

def apply_target_encoding(X_test, encoder, columns):
    X_test_numpy = X_test.select(columns).to_numpy()
    X_test_encoded = encoder.transform(X_test_numpy)
    
    encoded_test_df = pl.DataFrame(
        {col: X_test_encoded[:, i] for i, col in enumerate(columns)}
    )
    X_test = X_test.drop(columns)
    X_test = pl.concat([X_test, encoded_test_df], how="horizontal")
    
    return X_test
    

# Initialize wandb
user_secrets = UserSecretsClient()
WANDB_API_KEY = user_secrets.get_secret("wandb_api")
wandb.login(key=WANDB_API_KEY)
wandb_run = wandb.init(project="playground-series-s5e4", config=asdict(cfg))

# Read CSV files
df_train = pl.read_csv(cfg.train_path)
df_train = df_train.filter(pl.col("Number_of_Ads").is_not_null())
df_test = pl.read_csv(cfg.test_path)

test_ids = df_test["id"]

df_train = df_train.drop("id")
df_test = df_test.drop("id")

target_col = "Listening_Time_minutes"

# Load and prepare podcast data
df_pltpd = pl.read_csv(cfg.pltpd_path)
df_pltpd = df_pltpd.filter(pl.col("Listening_Time_minutes").is_not_null())
y_pltpd = df_pltpd["Listening_Time_minutes"]
df_pltpd = df_pltpd.drop("Listening_Time_minutes")
df_pltpd = df_pltpd.with_columns(pl.col("Number_of_Ads").cast(pl.Float64))

# Create KFold
kf = KFold(n_splits=cfg.num_fold, random_state=42, shuffle=True)

# Get all row indices
all_indices = np.arange(len(df_train))

# Initialize predictions array for test data
test_preds = np.zeros(len(df_test))

# Train models with cross-validation
for fold, (idx_train, idx_valid) in enumerate(kf.split(all_indices)):
    print(f"Training fold {fold+1}/{cfg.num_fold}")

    # Use direct indexing to select rows from the Polars DataFrame
    X_train = df_train.drop(target_col)[idx_train.tolist()]
    y_train = df_train.select(target_col)[idx_train.tolist()]
    X_valid = df_train.drop(target_col)[idx_valid.tolist()]
    y_valid = df_train.select(target_col)[idx_valid.tolist()]
    
    # Concatenate with podcast data
    X_train = pl.concat([X_train, df_pltpd], how="vertical")
    y_train = pl.concat([y_train, pl.DataFrame({target_col: y_pltpd})], how="vertical")

    # Only use this line for debugging with a small sample
    if hasattr(cfg, 'debug') and cfg.debug:
        X_train = X_train.sample(100)
        y_train = y_train.sample(100)

    # Preprocess
    X_train = preprocess(X_train)
    X_valid = preprocess(X_valid, X_train)
    X_test = preprocess(df_test, X_train)
    
    # Feature engineering
    df_train_with_target = X_train.with_columns(y_train.select(pl.col(target_col)).rename({target_col: target_col}))
    X_train = feature_eng(X_train, df_train_with_target)
    X_valid = feature_eng(X_valid, df_train_with_target)
    X_test = feature_eng(X_test, df_train_with_target)
    
    # # Encoding
    before_encode_len = len(X_train.columns)
    columns_to_encode = [
        "Host_Popularity_percentage",
        "Guest_Popularity_percentage",
        "Episode_Length_minutes",
        "Episode_Num",
        "Podcast_Name",
        "Publication_Day",
        "Publication_Time",
        "Episode_Sentiment",
        "Genre",
        "Number_of_Ads",
        "Episode_Length_minutes_NaN",
        "Guest_Popularity_percentage_NaN",
        "HPperc_Int",
        "HPperc_Dec",
        "ELen_Int",
        "ELen_Dec",
        "Length_per_Ads",
    ]
    pair_size = [2, 3, 4]
    combinations_list = get_combinations(X_train, columns_to_encode, pair_size)
    print("Combinations list length:", len(combinations_list))
    print("Combinations list:", combinations_list)

    X_train = cols_encode(X_train, combinations_list)
    X_valid = cols_encode(X_valid, combinations_list)
    X_test = cols_encode(X_test, combinations_list)
    
    # Target encoding - we need to apply this with Polars
    encoded_columns = X_train.columns[before_encode_len:]
    
    # For target encoding, we'll use a custom function that works with polars
    X_train, X_valid, X_test, encoder = target_encode_polars(
        X_train, y_train, X_valid, X_test,
        columns=encoded_columns,
        random_state=cfg.random_state
    )
    
    # Train model
    cat_cols = [col for col in X_train.columns if pl.Categorical in X_train[col].dtype.base_type().__mro__ or X_train[col].dtype == pl.Utf8]
    cat_cols_idx = [i for i, col in enumerate(X_train.columns) if col in cat_cols]
    category_mappings = {}
    cat_dims = []
    
    combined_data = pl.concat([X_train, X_valid, X_test], how="vertical")
    for col in cat_cols:
        cat_dims.append(combined_data[col].unique().count())
        
        unique_values = combined_data[col].unique().sort()
        mapping = {val: idx for idx, val in enumerate(unique_values)}
        category_mappings[col] = mapping

        X_train = X_train.with_columns(
            pl.col(col).map_elements(lambda x: mapping.get(x, None)).alias(col)
        )
        X_valid = X_valid.with_columns(
            pl.col(col).map_elements(lambda x: mapping.get(x, None)).alias(col)
        )
        X_test = X_test.with_columns(
            pl.col(col).map_elements(lambda x: mapping.get(x, None)).alias(col)
        )
    
    print(cat_cols_idx, cat_dims, X_train)

    # Convert Polars DataFrames to numpy arrays
    X_train = X_train.to_numpy()
    X_valid = X_valid.to_numpy()
    X_test = X_test.to_numpy()
    
    # Reshape target variables
    y_train = y_train.to_numpy().reshape(-1, 1) if hasattr(y_train, "to_numpy") else np.array(y_train).reshape(-1, 1)
    y_valid = y_valid.to_numpy().reshape(-1, 1) if hasattr(y_valid, "to_numpy") else np.array(y_valid).reshape(-1, 1)

    print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)
    
    # Set up model with proper categorical indices and dimensions
    model = TabNetRegressor(
        n_d=64,
        n_a=64,
        n_steps=5,
        gamma=1.5,
        cat_idxs=cat_cols_idx,
        cat_dims=cat_dims,
        optimizer_fn=torch.optim.AdamW,
        optimizer_params={"lr": 2e-2},
        scheduler_fn=torch.optim.lr_scheduler.StepLR,
        scheduler_params={"step_size": 10, "gamma": 0.9},
        mask_type="sparsemax",
        lambda_sparse=1e-3,
        # seed=42,
        verbose=1,
        device_name = "cuda:0"
        # device_name = "cpu"
    )

    # Train the model with our proper callback class
    model.fit(
        X_train=X_train,
        y_train=y_train,
        eval_set=[(X_valid, y_valid)],
        eval_name=["valid"],
        eval_metric=["rmse"],
        max_epochs=200,
        patience=10,
        batch_size=1024,
        virtual_batch_size=128,
        callbacks=[WandbCallback()],
    )
    
    # Add predictions
    fold_pred = model.predict(X_test)
    test_preds += fold_pred.flatten()

    # Cleanup
    gc.collect()

    if hasattr(cfg, 'debug') and cfg.debug and fold > 1:
        break 

# Log final results
wandb.log({"final_predictions": wandb.Histogram(test_preds)})
wandb.finish()
gc.collect()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmasaishi[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Tracking run with wandb version 0.19.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250422_020521-hhzgooxg[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mdainty-bird-442[0m
[34m[1mwandb[0m: ‚≠êÔ∏è View project at [34m[4mhttps://wandb.ai/masaishi/playground-series-s5e4[0m
[34m[1mwandb[0m: üöÄ View run at [34m[4mhttps://wandb.ai/masaishi/playground-series-s5e4/runs/hhzgooxg[0m


Training fold 1/5
Combinations list length: 20
Combinations list: [('Publication_Time', 'Episode_Length_minutes_NaN', 'HPperc_Int'), ('Host_Popularity_percentage', 'Publication_Day', 'Guest_Popularity_percentage_NaN'), ('Episode_Num', 'Episode_Sentiment', 'Genre', 'ELen_Int'), ('Publication_Time', 'Guest_Popularity_percentage_NaN', 'Length_per_Ads'), ('Host_Popularity_percentage', 'Episode_Num', 'Episode_Length_minutes_NaN'), ('Podcast_Name', 'Number_of_Ads', 'Episode_Length_minutes_NaN', 'HPperc_Dec'), ('Host_Popularity_percentage', 'Guest_Popularity_percentage', 'Publication_Day', 'Episode_Sentiment'), ('Host_Popularity_percentage', 'Publication_Time', 'Genre', 'Episode_Length_minutes_NaN'), ('Episode_Length_minutes', 'Publication_Day', 'Genre', 'Guest_Popularity_percentage_NaN'), ('Host_Popularity_percentage', 'Genre', 'Guest_Popularity_percentage_NaN'), ('Podcast_Name', 'Episode_Sentiment', 'Guest_Popularity_percentage_NaN', 'ELen_Int'), ('Guest_Popularity_percentage_NaN', 'HPperc_

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:05<00:00,  3.37it/s]


Memory usage: 11263.10 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 27.46it/s]


Memory usage: 2988.48 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:01<00:00, 16.38it/s]


Memory usage: 4774.26 MB
Total number of columns: 52
[0, 2, 4, 5, 8, 9, 10, 11] [48, 10, 7, 4, 3, 100, 2, 2] shape: (647_104, 52)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Podcast_Na ‚îÜ Episode_Le ‚îÜ Genre ‚îÜ Host_Popul ‚îÜ ‚Ä¶ ‚îÜ colen_Epi ‚îÜ colen_Pod ‚îÜ colen_Epi ‚îÜ colen_Gue ‚îÇ
‚îÇ me         ‚îÜ ngth_minut ‚îÜ ---   ‚îÜ arity_perc ‚îÜ   ‚îÜ sode_Num_ ‚îÜ cast_Name ‚îÜ sode_Sent ‚îÜ st_Popula ‚îÇ
‚îÇ ---        ‚îÜ es         ‚îÜ i64   ‚îÜ entage     ‚îÜ   ‚îÜ Publicati ‚îÜ _Guest_Po ‚îÜ iment_Gen ‚îÜ rity_perc ‚îÇ
‚îÇ i64        ‚îÜ ---        ‚îÜ       ‚îÜ ---        ‚îÜ   ‚îÜ on_‚Ä¶      ‚îÜ pul‚Ä¶      ‚îÜ re_‚Ä¶      ‚îÜ ent‚Ä¶      ‚îÇ
‚îÇ            ‚îÜ f32        ‚îÜ       ‚îÜ f32        ‚îÜ   ‚îÜ ---       

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:05<00:00,  3.37it/s]


Memory usage: 11300.25 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 27.79it/s]


Memory usage: 2997.65 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:01<00:00, 16.64it/s]


Memory usage: 4790.95 MB
Total number of columns: 52
[0, 2, 4, 5, 8, 9, 10, 11] [48, 10, 7, 4, 3, 100, 2, 2] shape: (647_104, 52)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Podcast_Na ‚îÜ Episode_Le ‚îÜ Genre ‚îÜ Host_Popul ‚îÜ ‚Ä¶ ‚îÜ colen_Gue ‚îÜ colen_Epi ‚îÜ colen_Pod ‚îÜ colen_Gue ‚îÇ
‚îÇ me         ‚îÜ ngth_minut ‚îÜ ---   ‚îÜ arity_perc ‚îÜ   ‚îÜ st_Popula ‚îÜ sode_Num_ ‚îÜ cast_Name ‚îÜ st_Popula ‚îÇ
‚îÇ ---        ‚îÜ es         ‚îÜ i64   ‚îÜ entage     ‚îÜ   ‚îÜ rity_perc ‚îÜ Publicati ‚îÜ _Guest_Po ‚îÜ rity_perc ‚îÇ
‚îÇ i64        ‚îÜ ---        ‚îÜ       ‚îÜ ---        ‚îÜ   ‚îÜ ent‚Ä¶      ‚îÜ on_‚Ä¶      ‚îÜ pul‚Ä¶      ‚îÜ ent‚Ä¶      ‚îÇ
‚îÇ            ‚îÜ f32        ‚îÜ       ‚îÜ f32        ‚îÜ   ‚îÜ ---       

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:05<00:00,  3.34it/s]


Memory usage: 11426.74 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 27.62it/s]


Memory usage: 3032.70 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:01<00:00, 17.20it/s]


Memory usage: 4846.29 MB
Total number of columns: 52
[0, 2, 4, 5, 8, 9, 10, 11] [48, 10, 7, 4, 3, 100, 2, 2] shape: (647_104, 52)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Podcast_Na ‚îÜ Episode_Le ‚îÜ Genre ‚îÜ Host_Popul ‚îÜ ‚Ä¶ ‚îÜ colen_Pub ‚îÜ colen_Epi ‚îÜ colen_Epi ‚îÜ colen_Pod ‚îÇ
‚îÇ me         ‚îÜ ngth_minut ‚îÜ ---   ‚îÜ arity_perc ‚îÜ   ‚îÜ lication_ ‚îÜ sode_Leng ‚îÜ sode_Num_ ‚îÜ cast_Name ‚îÇ
‚îÇ ---        ‚îÜ es         ‚îÜ i64   ‚îÜ entage     ‚îÜ   ‚îÜ Time_Epis ‚îÜ th_minute ‚îÜ Publicati ‚îÜ _Guest_Po ‚îÇ
‚îÇ i64        ‚îÜ ---        ‚îÜ       ‚îÜ ---        ‚îÜ   ‚îÜ ode‚Ä¶      ‚îÜ s_P‚Ä¶      ‚îÜ on_‚Ä¶      ‚îÜ pul‚Ä¶      ‚îÇ
‚îÇ            ‚îÜ f32        ‚îÜ       ‚îÜ f32        ‚îÜ   ‚îÜ ---       

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:06<00:00,  3.33it/s]


Memory usage: 11273.98 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 27.54it/s]


Memory usage: 2984.76 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:01<00:00, 16.69it/s]


Memory usage: 4772.46 MB
Total number of columns: 52
[0, 2, 4, 5, 8, 9, 10, 11] [48, 10, 7, 4, 3, 100, 2, 2] shape: (647_104, 52)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Podcast_Na ‚îÜ Episode_Le ‚îÜ Genre ‚îÜ Host_Popul ‚îÜ ‚Ä¶ ‚îÜ colen_Epi ‚îÜ colen_Epi ‚îÜ colen_Pod ‚îÜ colen_Gue ‚îÇ
‚îÇ me         ‚îÜ ngth_minut ‚îÜ ---   ‚îÜ arity_perc ‚îÜ   ‚îÜ sode_Num_ ‚îÜ sode_Num_ ‚îÜ cast_Name ‚îÜ st_Popula ‚îÇ
‚îÇ ---        ‚îÜ es         ‚îÜ i64   ‚îÜ entage     ‚îÜ   ‚îÜ Podcast_N ‚îÜ Publicati ‚îÜ _Guest_Po ‚îÜ rity_perc ‚îÇ
‚îÇ i64        ‚îÜ ---        ‚îÜ       ‚îÜ ---        ‚îÜ   ‚îÜ ame‚Ä¶      ‚îÜ on_‚Ä¶      ‚îÜ pul‚Ä¶      ‚îÜ ent‚Ä¶      ‚îÇ
‚îÇ            ‚îÜ f32        ‚îÜ       ‚îÜ f32        ‚îÜ   ‚îÜ ---       

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:06<00:00,  3.18it/s]


Memory usage: 11407.03 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:00<00:00, 26.11it/s]


Memory usage: 3041.98 MB
Total number of columns: 52


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:01<00:00, 16.16it/s]


Memory usage: 4852.20 MB
Total number of columns: 52
[0, 2, 4, 5, 8, 9, 10, 11] [48, 10, 7, 4, 3, 100, 2, 2] shape: (647_105, 52)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ Podcast_Na ‚îÜ Episode_Le ‚îÜ Genre ‚îÜ Host_Popul ‚îÜ ‚Ä¶ ‚îÜ colen_Epi ‚îÜ colen_Gue ‚îÜ colen_Epi ‚îÜ colen_Pod ‚îÇ
‚îÇ me         ‚îÜ ngth_minut ‚îÜ ---   ‚îÜ arity_perc ‚îÜ   ‚îÜ sode_Leng ‚îÜ st_Popula ‚îÜ sode_Num_ ‚îÜ cast_Name ‚îÇ
‚îÇ ---        ‚îÜ es         ‚îÜ i64   ‚îÜ entage     ‚îÜ   ‚îÜ th_minute ‚îÜ rity_perc ‚îÜ Publicati ‚îÜ _Guest_Po ‚îÇ
‚îÇ i64        ‚îÜ ---        ‚îÜ       ‚îÜ ---        ‚îÜ   ‚îÜ s_E‚Ä¶      ‚îÜ ent‚Ä¶      ‚îÜ on_‚Ä¶      ‚îÜ pul‚Ä¶      ‚îÇ
‚îÇ            ‚îÜ f32        ‚îÜ       ‚îÜ f32        ‚îÜ   ‚îÜ ---       

[34m[1mwandb[0m: uploading output.log; uploading wandb-summary.json; uploading config.yaml
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:       loss ‚ñÉ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñà‚ñÇ‚ñÅ‚ñÅ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m:         lr ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÑ‚ñÑ‚ñÑ‚ñÇ‚ñà‚ñà‚ñà‚ñà‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÑ‚ñÑ‚ñÑ‚ñà‚ñà‚ñà‚ñÜ‚ñÜ‚ñÜ‚ñÑ‚ñà‚ñà‚ñà‚ñÜ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m: valid_rmse ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÉ‚ñÇ‚ñÖ‚ñÑ‚ñÉ‚ñà‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÑ‚ñÉ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÜ‚ñÉ‚ñÇ‚ñÉ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:       loss 153.24976
[34m[1mwandb[0m:         lr 0.01181
[34m[1mwandb[0m: valid_rmse 12.27736
[34m[1mwandb[0m: 
[34m[1mwandb[0m: üöÄ View run [33mdainty-bird-442[0m at: [34m[4mhttps://wandb.ai/masaishi/pla

0

In [5]:
from IPython.display import display
import pandas as pd

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

df_subm = pd.read_csv(cfg.sub_path)
df_subm["Listening_Time_minutes"] = test_preds / cfg.num_fold
df_subm.to_csv('raw_submission.csv', index=False)
df_subm

Unnamed: 0,id,Listening_Time_minutes
0,750000,55.038874
1,750001,22.756876
2,750002,49.560295
3,750003,81.269464
4,750004,52.077427
...,...,...
249995,999995,12.629090
249996,999996,57.767439
249997,999997,6.494316
249998,999998,74.771155


In [6]:
df_train = pd.read_csv('/kaggle/input/playground-series-s5e4/train.csv')
df_test = pd.read_csv('/kaggle/input/playground-series-s5e4/test.csv')

df_pltpd = pd.read_csv('/kaggle/input/podcast-listening-time-prediction-dataset/podcast_dataset.csv')
df_pltpd = df_pltpd.dropna(subset=["Listening_Time_minutes"])
df_pltpd = df_pltpd.reset_index(drop=True)
df_pltpd.index = df_pltpd.index + 1000000
df_pltpd['id'] = df_pltpd.index

df_train = pd.concat([df_train, df_pltpd], axis=0)

In [7]:
df_test_with_id = df_test.copy()

df_test_with_id.loc[df_test_with_id["Number_of_Ads"] > 103.91, "Number_of_Ads"] = 0.0
df_over_ads = df_test_with_id[df_test_with_id["Number_of_Ads"] > 3]
df_over_ads["Listening_Time_minutes"] = df_over_ads["Number_of_Ads"] * 0.993
display(df_over_ads)

display(df_subm.loc[df_subm['id'].isin(df_over_ads['id'].values)])
df_subm.loc[df_subm['id'].isin(df_over_ads['id'].values), 'Listening_Time_minutes'] = df_over_ads['Listening_Time_minutes'].values
display(df_subm.loc[df_subm['id'].isin(df_over_ads['id'].values)])

Unnamed: 0,id,Podcast_Name,Episode_Title,Episode_Length_minutes,Genre,Host_Popularity_percentage,Publication_Day,Publication_Time,Guest_Popularity_percentage,Number_of_Ads,Episode_Sentiment,Listening_Time_minutes
37939,787939,Life Lessons,Episode 94,89.84,Lifestyle,60.79,Sunday,Afternoon,62.13,89.12,Neutral,88.49616


Unnamed: 0,id,Listening_Time_minutes
37939,787939,70.12211


Unnamed: 0,id,Listening_Time_minutes
37939,787939,88.49616


In [8]:
cols_to_compare = ['Episode_Title', 'Host_Popularity_percentage', 'Guest_Popularity_percentage']

df_test_with_id = df_test.copy()
df_test_with_id = df_test_with_id.dropna(subset=['Guest_Popularity_percentage'])

leaked_rows = df_test_with_id.merge(
    df_train[cols_to_compare + ['Listening_Time_minutes']].drop_duplicates(),
    on=cols_to_compare,
    how='inner'
)

mean_values = leaked_rows.groupby('id')['Listening_Time_minutes'].mean().reset_index()

display(df_subm.loc[df_subm['id'].isin(mean_values['id'].values)])
df_subm.loc[df_subm['id'].isin(mean_values['id'].values), "Listening_Time_minutes"] = mean_values["Listening_Time_minutes"].values
display(df_subm.loc[df_subm['id'].isin(mean_values['id'].values)])

Unnamed: 0,id,Listening_Time_minutes
51,750051,20.075962
52,750052,55.785649
227,750227,67.357727
243,750243,27.228897
312,750312,33.769013
...,...,...
249802,999802,36.873717
249832,999832,87.606976
249841,999841,19.938288
249854,999854,73.303650


Unnamed: 0,id,Listening_Time_minutes
51,750051,18.060370
52,750052,65.789562
227,750227,58.393200
243,750243,23.830000
312,750312,26.825190
...,...,...
249802,999802,45.055790
249832,999832,89.819210
249841,999841,17.234691
249854,999854,67.816132


In [9]:
df_subm.to_csv('submission.csv', index=False)
pl.read_csv('submission.csv')

id,Listening_Time_minutes
i64,f64
750000,55.038874
750001,22.756876
750002,49.560295
750003,81.269464
750004,52.077427
…,…
999995,12.62909
999996,57.767439
999997,6.494316
999998,74.771155
