In [1]:
import numpy as np
import polars as pl
import catboost
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
import seaborn as sns

### Меня зовут Вершинин Михаил

# Установка библиотек

In [2]:
# Функция для создания новых фичей на основе дат
def create_date_features(df):
    # Преобразование столбцов в тип даты
    df = df.with_columns([
        pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").alias("date"),
        pl.col("next_date").str.strptime(pl.Date, "%Y-%m-%d").alias("next_date")
    ])
    
    # Разница между датами в днях
    df = df.with_columns([
        (pl.col("next_date") - pl.col("date")).alias("date_diff_days"),
    ])
    
    return df

# Вторая функция для создания новых фичей на основе двух дат и геоданных
def create_many_time_geo_features(df: pl.DataFrame, lat_col='lat', lon_col='lng') -> pl.DataFrame:
    df = df.rename({"date": "datetime", "next_date": "next_datetime"})
    
    # 1. Базовые признаки для обеих дат
    df = df.with_columns([
        pl.col("datetime").dt.year().alias("year"),
        pl.col("datetime").dt.month().alias("month"),
        pl.col("next_datetime").dt.year().alias("next_year"),
        pl.col("next_datetime").dt.month().alias("next_month")
    ])
    
    # 2. Циклические признаки для обеих дат
    df = df.with_columns([
        (2 * np.pi * pl.col("month") / 12).sin().alias("month_sin"),
        (2 * np.pi * pl.col("month") / 12).cos().alias("month_cos"),
        (2 * np.pi * pl.col("next_month") / 12).sin().alias("next_month_sin"),
        (2 * np.pi * pl.col("next_month") / 12).cos().alias("next_month_cos"),
    ])
    
    # 3. Географические признаки
    df = df.with_columns([
        pl.col(lat_col).radians().cos().alias("lat_cos"),
        pl.col(lat_col).radians().sin().alias("lat_sin"),
        pl.col(lon_col).radians().cos().alias("lon_cos"),
        pl.col(lon_col).radians().sin().alias("lon_sin"),
    ])
    
    # Сортировка по дате
    df = df.sort("datetime")
    
    return df


### Загрузка данных и предобработка

In [3]:
train = pl.read_csv(r'/kaggle/input/ioai-2025-preparation-class-lesson-4-homework/train.csv').sort('date')
sab = pl.read_csv(r'/kaggle/input/ioai-2025-preparation-class-lesson-4-homework/sample_submission.csv')
sab = sab.with_columns([
    pl.col("id").str.split("_").list.get(0).alias("date"),
    pl.col("id").str.split("_").list.get(1).cast(pl.Int64).alias("id_house"),
    pl.col("target").cast(pl.Float64).alias("target")  # <-- правильное приведение к float
]).drop(["id"])

In [4]:
train

apart_to_room,num_builds_live,mean_price,date,num_builds_series_live,room_three,id_house,med_price,room_four,room_one,mean_area,room_zero,number_total,room_two,vc_city_quadkey,healthcare_cnt,flats_cnt,beauty_cnt,shopping_cnt,build_year_median,lng,lat
f64,i64,f64,str,i64,f64,i64,f64,f64,f64,f64,f64,i64,f64,i64,f64,f64,f64,f64,f64,f64,f64
0.0,1,43500.0,"""2020-01-01""",1,1.0,6123,42857.142188,0.0,0.0,67.2,0.0,3,0.0,9,0.0,1.0,0.0,0.0,1981.5,59.107842,79.032814
0.0,3,53760.726562,"""2020-01-01""",1,0.0,4104,54460.228125,0.0,0.5,46.8,0.0,4,0.5,27,2.0,0.0,1.0,1.0,1981.0,59.371514,79.087761
0.0,4,50556.533203,"""2020-01-01""",1,0.0,1627,48444.445312,0.0,0.75,44.4,0.0,4,0.25,26,3.0,0.0,0.0,0.0,2011.0,59.371514,79.08166
0.0,1,34845.735938,"""2020-01-01""",1,1.0,151,34845.735938,0.0,0.0,66.119998,0.0,1,0.0,1,0.0,0.0,0.0,0.0,2013.0,58.492607,78.953307
0.0,2,62347.146875,"""2020-01-01""",2,0.666667,6820,60115.603125,0.0,0.0,62.68,0.0,3,0.333333,20,1.0,0.0,2.0,0.0,1984.0,58.448662,78.873635
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.0,1,23333.334375,"""2022-05-01""",1,0.0,5247,23333.334375,0.0,1.0,43.2,0.0,1,0.0,4,0.0,0.0,0.0,0.0,1954.0,62.359795,77.778694
0.0,21,25875.778668,"""2022-05-01""",1,0.092593,3991,25732.323047,0.0,0.185185,54.955556,0.0,54,0.722222,41,0.0,0.0,0.0,1.0,1963.0,62.359795,77.772386
0.0,1,30000.0,"""2022-05-01""",1,0.0,3430,30000.0,0.0,1.0,36.0,0.0,1,0.0,5,3.0,0.0,0.0,0.0,1957.0,62.370781,77.778694
0.0,7,25821.780762,"""2022-05-01""",1,0.166667,6599,24000.0,0.0,0.083333,56.75,0.0,12,0.75,20,0.0,0.0,0.0,0.0,1965.0,62.370781,77.772386


In [5]:
sab

target,date,id_house
f64,str,i64
0.0,"""2022-06-01""",6123
0.0,"""2022-07-01""",6123
0.0,"""2022-08-01""",6123
0.0,"""2022-06-01""",4104
0.0,"""2022-07-01""",4104
…,…,…
0.0,"""2022-07-01""",6599
0.0,"""2022-08-01""",6599
0.0,"""2022-06-01""",5668
0.0,"""2022-07-01""",5668


### Для предсказания будущего буду использовать последние данные о домах

In [6]:
# Получение последней известной информации о каждом доме
latest_info = train.sort("date").group_by("id_house").agg(pl.all().last())

In [7]:
latest_info

id_house,apart_to_room,num_builds_live,mean_price,date,num_builds_series_live,room_three,med_price,room_four,room_one,mean_area,room_zero,number_total,room_two,vc_city_quadkey,healthcare_cnt,flats_cnt,beauty_cnt,shopping_cnt,build_year_median,lng,lat
i64,f64,i64,f64,str,i64,f64,f64,f64,f64,f64,f64,i64,f64,i64,f64,f64,f64,f64,f64,f64,f64
3192,0.0,3,60319.75,"""2022-05-01""",1,0.25,57510.660937,0.0,0.166667,57.06,0.0,12,0.583333,3,2.0,50.0,0.0,0.0,1960.0,59.767021,78.836808
1659,0.0,1,47826.084375,"""2021-09-01""",1,0.0,47826.084375,0.0,1.0,49.680002,0.0,1,0.0,6,2.0,0.0,1.0,3.0,1972.0,60.634941,78.756895
5949,0.0,6,87664.86529,"""2022-05-01""",1,0.285714,86169.24375,0.0,0.071429,64.225714,0.0,14,0.642857,19,7.0,0.0,3.0,4.0,1967.5,59.810967,78.211592
3832,0.0,1,76442.310937,"""2021-07-01""",1,0.0,76442.310937,0.0,0.0,49.919998,0.0,1,1.0,3,0.0,0.0,0.0,1.0,1972.0,59.3825,78.608929
5705,0.0,1,165714.28125,"""2021-05-01""",1,0.0,165714.28125,0.0,1.0,25.2,1.0,1,0.0,2,1.0,0.0,0.0,3.0,2014.0,60.25042,78.261479
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
5356,0.0,16,107577.715248,"""2022-05-01""",1,0.293103,109545.454687,0.025862,0.362069,68.728965,0.017241,116,0.318966,30,16.0,0.0,24.0,16.0,1998.0,59.558281,78.030222
6011,0.0,1,26470.589062,"""2020-01-01""",1,1.0,26470.589062,0.0,0.0,81.6,0.0,1,0.0,2,0.0,1.0,0.0,0.0,2006.0,58.767266,78.689147
839,0.0,4,361848.614063,"""2022-05-01""",3,0.0,295569.890625,0.0,0.5,52.23,0.25,4,0.5,7,1.0,0.0,0.0,1.0,1961.0,60.360283,78.410763
5704,0.0,7,161306.919886,"""2022-05-01""",2,0.272727,165000.0,0.0,0.272727,80.432728,0.0,11,0.454545,49,0.0,0.0,2.0,1.0,2013.5,60.45916,78.167889


### Второй принцип предсказание - предсказание разниц между ценами на ближайших промежутках времени

In [8]:
df = train.sort(["id_house", "date"])

# Вычисление разницы цен (target)
df = df.with_columns([
    (pl.col("mean_price").shift(-1).over("id_house") - pl.col("mean_price")).alias("price_diff"),
    pl.col("date").shift(-1).over("id_house").alias("next_date")
])

df = df.drop_nulls(["price_diff", "next_date"])

In [9]:
df.filter(df['id_house'] == 12)

apart_to_room,num_builds_live,mean_price,date,num_builds_series_live,room_three,id_house,med_price,room_four,room_one,mean_area,room_zero,number_total,room_two,vc_city_quadkey,healthcare_cnt,flats_cnt,beauty_cnt,shopping_cnt,build_year_median,lng,lat,price_diff,next_date
f64,i64,f64,str,i64,f64,i64,f64,f64,f64,f64,f64,i64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,str
0.0,1,86956.51875,"""2020-10-01""",1,0.0,12,86956.51875,0.0,1.0,41.4,0.0,1,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2020-11-01"""
0.0,1,86956.51875,"""2020-11-01""",1,0.0,12,86956.51875,0.0,1.0,41.4,0.0,1,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2020-12-01"""
0.0,1,86956.51875,"""2020-12-01""",1,0.0,12,86956.51875,0.0,1.0,41.4,0.0,1,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2021-01-01"""
0.0,1,86956.51875,"""2021-01-01""",1,0.0,12,86956.51875,0.0,1.0,41.4,0.0,1,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,1989.792187,"""2021-02-01"""
0.0,2,88946.310937,"""2021-02-01""",1,0.5,12,88946.310937,0.0,0.5,61.080002,0.0,2,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2021-03-01"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.0,1,95487.014062,"""2021-10-01""",1,0.0,12,95487.014062,0.0,1.0,39.959999,0.0,2,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2021-11-01"""
0.0,1,95487.014062,"""2021-11-01""",1,0.0,12,95487.014062,0.0,1.0,39.959999,0.0,2,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2021-12-01"""
0.0,1,95487.014062,"""2021-12-01""",1,0.0,12,95487.014062,0.0,1.0,39.959999,0.0,2,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,-941.564062,"""2022-01-01"""
0.0,1,94545.45,"""2022-01-01""",1,0.0,12,94545.45,0.0,1.0,39.6,0.0,1,0.0,6,0.0,0.0,4.0,2.0,1989.0,60.865654,78.205352,0.0,"""2022-02-01"""


### Добавляем все колонки

In [10]:
new_train = df.rename({'price_diff': 'target'}).sort('date').drop(['med_price'])
new_train = create_many_time_geo_features(create_date_features(new_train), 'lat', 'lng').fill_null(0)

In [11]:
new_train

apart_to_room,num_builds_live,mean_price,datetime,num_builds_series_live,room_three,id_house,room_four,room_one,mean_area,room_zero,number_total,room_two,vc_city_quadkey,healthcare_cnt,flats_cnt,beauty_cnt,shopping_cnt,build_year_median,lng,lat,target,next_datetime,date_diff_days,year,month,next_year,next_month,month_sin,month_cos,next_month_sin,next_month_cos,lat_cos,lat_sin,lon_cos,lon_sin
f64,i64,f64,date,i64,f64,i64,f64,f64,f64,f64,i64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,date,duration[ms],i32,i8,i32,i8,f64,f64,f64,f64,f64,f64,f64,f64
0.0,8,113026.606534,2020-01-01,2,0.272727,1,0.181818,0.272727,83.76,0.0,11,0.272727,17,0.0,11.0,0.0,1.0,2013.0,60.118584,78.142894,-4499.154034,2020-02-01,31d,2020,1,2020,2,0.5,0.866025,0.866025,0.5,0.205472,0.978663,0.498207,0.867058
0.0,3,43859.692188,2020-01-01,1,0.0,2,0.0,0.666667,42.4,0.0,3,0.333333,17,0.0,0.0,0.0,0.0,1959.0,61.854424,78.03649,0.0,2020-02-01,31d,2020,1,2020,2,0.5,0.866025,0.866025,0.5,0.207289,0.97828,0.471713,0.881752
0.0,5,168524.488125,2020-01-01,5,0.2,3,0.0,0.6,52.624,0.0,15,0.2,11,108.0,95.0,1.0,0.0,1973.0,60.645928,78.373495,-1531.164167,2020-02-01,31d,2020,1,2020,2,0.5,0.866025,0.866025,0.5,0.201531,0.979482,0.490205,0.871607
0.0,1,60732.984375,2020-01-01,1,1.0,4,0.0,0.0,68.759999,0.0,1,0.0,1,0.0,0.0,0.0,0.0,0.0,59.459404,78.236544,0.0,2020-02-01,31d,2020,1,2020,2,0.5,0.866025,0.866025,0.5,0.203872,0.978998,0.508149,0.861269
0.0,5,109145.205469,2020-01-01,1,0.0,5,0.0,0.166667,52.1,0.0,12,0.833333,13,1.0,0.0,0.0,5.0,1960.5,60.590996,78.571849,262.860156,2020-02-01,31d,2020,1,2020,2,0.5,0.866025,0.866025,0.5,0.198139,0.980174,0.491041,0.871137
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.0,5,39952.650879,2022-04-01,1,0.0,7091,0.0,0.125,46.8,0.0,8,0.875,15,4.0,0.0,2.0,15.0,1959.0,58.997979,77.986319,0.0,2022-05-01,30d,2022,4,2022,5,0.866025,-0.5,0.5,-0.866025,0.208145,0.978098,0.515068,0.857149
0.0,2,61343.623828,2022-04-01,1,0.25,7092,0.0,0.5,57.33,0.0,4,0.25,8,0.0,9.0,0.0,0.0,1978.0,60.338311,79.075557,-2668.724766,2022-05-01,30d,2022,4,2022,5,0.866025,-0.5,0.5,-0.866025,0.189514,0.981878,0.494878,0.868963
0.0,10,170176.188715,2022-04-01,6,0.111111,7093,0.0,0.666667,47.879111,0.203704,54,0.148148,22,4.0,0.0,8.0,16.0,1994.0,59.964775,78.615105,-5748.605241,2022-05-01,30d,2022,4,2022,5,0.866025,-0.5,0.5,-0.866025,0.197399,0.980323,0.500532,0.865718
0.0,4,101027.404687,2022-04-01,1,0.111111,7094,0.055556,0.277778,78.313333,0.0,18,0.555556,12,3.0,1.0,3.0,4.0,2013.0,61.030449,78.211592,823.31057,2022-05-01,30d,2022,4,2022,5,0.866025,-0.5,0.5,-0.866025,0.204298,0.978909,0.484345,0.874877


In [12]:
from sklearn.model_selection import train_test_split

### Поскольку нельзя разделить в равных пропорциях на train и val дома, которые появились всего 1 раз, я их просто добавлю в train, чтобы в val встречались все из train

In [13]:
ftrain = new_train.filter(new_train['id_house'].is_in(sab['id_house']))
count_dates = new_train.group_by('id_house').agg(pl.count('datetime').alias('date_count'))

result = count_dates.filter(count_dates['date_count'] < 2)['id_house'].to_list()
sub_train = new_train.filter(new_train['id_house'].is_in(result))
new_new_train = new_train.filter(~new_train['id_house'].is_in(result))

In [14]:
trainset, valset = train_test_split(new_new_train, test_size=0.3, stratify=new_new_train['id_house'], random_state=42)
trainset = pl.concat([trainset, sub_train])

 ### Тут pandas чисто для проверки на корреляцию, больше он нигде не использован

In [15]:
cat_feats = ['room_zero', 'vc_city_quadkey', 'room_three', 'room_four', 'room_two', 'healthcare_cnt', 'flats_cnt', 'shopping_cnt', 'apart_to_room', 'mean_price', 'num_builds_live', 'build_year_median', 'number_total', 'room_one', 'mean_area', 'num_builds_series_live', 'id_house', 'beauty_cnt', 'lon_sin', 'lat_sin']
xgb_feats = ['room_zero', 'vc_city_quadkey', 'room_three', 'room_four', 'room_two', 'healthcare_cnt', 'flats_cnt', 'shopping_cnt', 'apart_to_room', 'mean_price', 'num_builds_live', 'build_year_median', 'number_total', 'room_one', 'mean_area', 'num_builds_series_live', 'id_house', 'beauty_cnt', 'lat_sin', 'lon_sin']
rfc_feats = ['date_diff_days', 'year', 'month', 'next_year', 'next_month', 'month_sin', 'month_cos', 'next_month_sin', 'next_month_cos', 'lat', 'lng', 'lat_cos', 'lon_cos']

In [16]:
((trainset.to_pandas()[cat_feats]).corr() > 0.95).sum()

room_zero                 1
vc_city_quadkey           1
room_three                1
room_four                 1
room_two                  1
healthcare_cnt            1
flats_cnt                 1
shopping_cnt              1
apart_to_room             1
mean_price                1
num_builds_live           1
build_year_median         1
number_total              1
room_one                  1
mean_area                 1
num_builds_series_live    1
id_house                  1
beauty_cnt                1
lon_sin                   1
lat_sin                   1
dtype: int64

In [17]:
((trainset.to_pandas()[xgb_feats]).corr() > 0.95).sum()

room_zero                 1
vc_city_quadkey           1
room_three                1
room_four                 1
room_two                  1
healthcare_cnt            1
flats_cnt                 1
shopping_cnt              1
apart_to_room             1
mean_price                1
num_builds_live           1
build_year_median         1
number_total              1
room_one                  1
mean_area                 1
num_builds_series_live    1
id_house                  1
beauty_cnt                1
lat_sin                   1
lon_sin                   1
dtype: int64

In [18]:
((trainset.to_pandas()[rfc_feats]).corr() > 0.95).sum()

date_diff_days    1
year              1
month             1
next_year         1
next_month        1
month_sin         1
month_cos         1
next_month_sin    1
next_month_cos    1
lat               1
lng               1
lat_cos           1
lon_cos           1
dtype: int64

In [19]:
len(cat_feats), len(xgb_feats), len(rfc_feats)

(20, 20, 13)

## Выше приведены доказательства о 2 критериях: о корреляции и о количестве фич для каждой модели

In [20]:
X_train_cat = trainset[cat_feats].to_numpy()
X_test_cat = valset[cat_feats].to_numpy()

X_train_xgb = trainset[xgb_feats].to_numpy()
X_test_xgb = valset[xgb_feats].to_numpy()

X_train_rfc = trainset[rfc_feats].to_numpy()
X_test_rfc = valset[rfc_feats].to_numpy()

y_train = trainset['target'].to_numpy()
y_test = valset['target'].to_numpy()

### Обучаем модели

In [21]:
# 1. CatBoost Model
catboost_model = catboost.CatBoostRegressor(
    iterations=4000,   
    learning_rate=0.05,
    depth=6,          
    l2_leaf_reg=3,        
    loss_function='MAE',
    cat_features=[],    
    random_seed=42,    
    verbose=100,    
)


# 2. XGBoost Model
xgboost_model = xgb.XGBRegressor(
    n_estimators=4000,  
    learning_rate=0.05,  
    max_depth=6,       
    subsample=0.8,      
    colsample_bytree=0.8, 
    gamma=0.1,           
    random_state=42      
)

# 3. Random Forest Model
rf_model = RandomForestRegressor(
    n_estimators=1000,  
    max_depth=6,        
    min_samples_split=10,
    min_samples_leaf=5,
    random_state=42
)

In [22]:
catboost_model.fit(X_train_cat, y_train, eval_set=(X_test_cat, y_test), early_stopping_rounds=100)

0:	learn: 2704.3326789	test: 2685.6980990	best: 2685.6980990 (0)	total: 80.7ms	remaining: 5m 22s
100:	learn: 2690.6835498	test: 2676.3704353	best: 2676.3671983 (95)	total: 1.78s	remaining: 1m 8s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2676.367198
bestIteration = 95

Shrink model to first 96 iterations.


<catboost.core.CatBoostRegressor at 0x7f5884bf7a00>

In [23]:
xgboost_model.fit(X_train_xgb, y_train, eval_set=[(X_test_xgb, y_test)], early_stopping_rounds=50, verbose=100)

[0]	validation_0-rmse:10398.92451




[51]	validation_0-rmse:10426.52009


In [24]:
rf_model.fit(X_train_rfc, y_train)

In [25]:
from sklearn.metrics import mean_absolute_error

### Смотрим метрику

In [26]:
print(mean_absolute_error(y_test, catboost_model.predict(X_test_cat)))
print(mean_absolute_error(y_test, xgboost_model.predict(X_test_xgb)))
print(mean_absolute_error(y_test, rf_model.predict(X_test_rfc)))

2676.3672004839286
2740.7970774956093
2772.16387182337


In [27]:
from tqdm import tqdm

### Формируем предсказание, и каждый шаг обновляем текущий mean_price на предсказанный diff

In [28]:
import numpy as np
from tqdm import tqdm

ans_list = []

for p in tqdm(sab.iter_rows()):
    temp = latest_info.filter(latest_info['id_house'] == p[2])
    
    # Обновление столбца "date" на основе условия
    if p[1] == "2022_07_01":
        temp = temp.with_columns(pl.lit("2022_06_01").alias("date"))
    elif p[1] == "2022_08_01":
        temp = temp.with_columns(pl.lit("2022_07_01").alias("date"))
    
    # Присваиваем "next_date" значение p[1]
    temp = temp.with_columns(pl.lit(p[1]).alias("next_date"))
    
    # Получаем предсказания от моделей
    temp = create_many_time_geo_features(create_date_features(temp)).fill_null(0).drop(['datetime', 'next_datetime', 'med_price'])
    
    # Переводим temp в формат numpy для модели
    temp_np = temp.to_numpy()

    temp_np_cat = temp[cat_feats].to_numpy()
    temp_np_xgb = temp[xgb_feats].to_numpy()
    temp_np_rfc = temp[rfc_feats].to_numpy()

    # Предсказания от моделей
    pred = np.mean([
        catboost_model.predict(temp_np_cat),
        xgboost_model.predict(temp_np_xgb),
        rf_model.predict(temp_np_rfc)
    ])

    # Здесь diff — это разница, предсказанная моделями
    diff = pred

    # Теперь обновляем столбец mean_price в latest_info для конкретного дома
    # Обновляем только те строки, где id_house совпадает с текущим p[2]
    latest_info = latest_info.with_columns(
        pl.when(pl.col("id_house") == p[2])
        .then(pl.col("mean_price") + diff)  # Прибавляем diff к mean_price
        .otherwise(pl.col("mean_price"))
        .alias("mean_price")
    )
    
    # Добавляем предсказание в список для дальнейшей обработки
    ans_list.append(pred + sum(temp['mean_price']))

# После завершения цикла ans_list будет содержать все нужные результаты.



15597it [10:09, 25.57it/s]


In [29]:
sab

target,date,id_house
f64,str,i64
0.0,"""2022-06-01""",6123
0.0,"""2022-07-01""",6123
0.0,"""2022-08-01""",6123
0.0,"""2022-06-01""",4104
0.0,"""2022-07-01""",4104
…,…,…
0.0,"""2022-07-01""",6599
0.0,"""2022-08-01""",6599
0.0,"""2022-06-01""",5668
0.0,"""2022-07-01""",5668


In [30]:
ans_list

[45829.449917386446,
 46735.99590158523,
 47438.3267473794,
 48476.50043645072,
 49391.207261500596,
 50107.761406398386,
 47781.941694895366,
 48710.883163935796,
 49437.81209132969,
 41721.80043664515,
 42631.88132191477,
 43337.7470687786,
 68388.7197672947,
 69298.40881507684,
 70003.88272445319,
 67927.92199966215,
 68837.61093606176,
 69543.08473405556,
 54936.44554851839,
 55839.390192045066,
 38707.07899111059,
 39614.68622448627,
 40318.07831945616,
 68885.83696356614,
 69798.73613531423,
 70507.42016865654,
 58069.651637381954,
 58983.76777066166,
 59693.66876553796,
 58260.56645271619,
 38492.916553401665,
 39398.54870636699,
 40099.96572092653,
 35133.9700689665,
 36032.85781764249,
 36727.530427912694,
 38820.138947505846,
 39720.34478242595,
 40416.33547894026,
 45843.60719186522,
 46743.75165072416,
 47439.68097117847,
 28857.088857874423,
 29742.020049305167,
 30422.736102330116,
 37710.397155648934,
 38594.019107384476,
 39273.425920714224,
 24767.03766653922,
 56077.2

In [31]:
len(ans_list)

15597

### Сохраняем предсказание и сабмитим

In [32]:
sab1 = pl.read_csv(r'/kaggle/input/ioai-2025-preparation-class-lesson-4-homework/sample_submission.csv')

In [33]:
sab1 = sab1.with_columns([
    pl.Series('target', ans_list)
])

In [34]:
sab1.write_csv('submission.csv')