In [12]:
import pandas as pd
from pycaret.regression import setup, compare_models, save_model

# 1. Wczytanie i przygotowanie danych
df = pd.read_csv('Ceny mieszkań.csv', sep=';', encoding='iso-8859-2')

df_long = df.melt(id_vars=df.columns[0], var_name='Okres', value_name='Cena')
df_long.columns = ['Miasto', 'Okres', 'Cena']

df_long[['Rok', 'Kwartał']] = df_long['Okres'].str.extract(r'(\d{4})\s+([IVX]+)\s+kwarta')
df_long.drop(columns=['Okres'], inplace=True)

map_kwartał = {'I': 1, 'II': 2, 'III': 3, 'IV': 4}
df_long['Kwartał'] = df_long['Kwartał'].map(map_kwartał)

df_long['Cena'] = (
    df_long['Cena'].astype(str)
    .str.replace(r'[^\d,]', '', regex=True)
    .str.replace(',', '.')
    .replace('', pd.NA)
    .astype(float)
)
df_long.dropna(inplace=True)

df_long['Rok'] = df_long['Rok'].astype(int)
df_long['Kwartał'] = df_long['Kwartał'].astype(int)

# 2. Konfiguracja PyCaret
exp = setup(
    data=df_long,
    target='Cena',
    categorical_features=['Miasto', 'Rok', 'Kwartał'],
    session_id=123,
    verbose=True
)

# 3. Porównanie modeli i wybór najlepszego
best_model = compare_models()

# 4. Zapisanie modelu do pliku
save_model(best_model, 'model_ceny_mieszkan')

print("Model wytrenowany i zapisany jako 'model_ceny_mieszkan.pkl'")


Unnamed: 0,Description,Value
0,Session id,123
1,Target,Cena
2,Target type,Regression
3,Original data shape,"(114, 4)"
4,Transformed data shape,"(114, 16)"
5,Transformed train set shape,"(79, 16)"
6,Transformed test set shape,"(35, 16)"
7,Categorical features,3
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,280.7405,163709.7015,363.2195,0.9741,0.0372,0.0303,0.015
lr,Linear Regression,319.8729,162994.5257,382.4463,0.966,0.0423,0.0364,0.592
br,Bayesian Ridge,322.9989,163611.5928,383.8827,0.966,0.0429,0.0368,0.01
lasso,Lasso Regression,323.2158,163936.9477,384.3523,0.9659,0.0429,0.0368,0.345
llar,Lasso Least Angle Regression,324.6178,166405.4764,388.8814,0.9657,0.0439,0.0373,0.009
lar,Least Angle Regression,342.7125,179554.5369,405.012,0.9638,0.0476,0.0401,0.01
ridge,Ridge Regression,379.7876,256535.8356,462.9669,0.961,0.0464,0.0404,0.01
par,Passive Aggressive Regressor,348.008,239988.2499,453.0485,0.9607,0.0434,0.0354,0.011
huber,Huber Regressor,351.047,205489.0572,432.5831,0.9593,0.0454,0.0384,0.01
et,Extra Trees Regressor,370.4757,261525.0574,478.1262,0.9586,0.0491,0.0392,0.026


Processing:   0%|          | 0/77 [00:00<?, ?it/s]

Transformation Pipeline and Model Successfully Saved
Model wytrenowany i zapisany jako 'model_ceny_mieszkan.pkl'
