In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Nadam
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import r2_score
from datetime import datetime
from sklearn.preprocessing import StandardScaler

In [2]:
df1 = pd.read_csv("Updated_Houses.csv", encoding = "utf-8")
df1.head()

Unnamed: 0,district,city,floor,price,rooms,sq,year,price_per_sq,update_date,offer_url
0,Mokotów,Warszawa,2,1925000.0,2,71.0,2008,27113.0,2024-07-04,https://www.otodom.pl/pl/oferta/gotowy-do-wpro...
1,Grunwald,Poznań,3,459000.0,2,43.39,1900,10578.0,2024-07-04,https://www.otodom.pl/pl/oferta/m-z-pieknym-wy...
2,Grunwald,Poznań,2,459000.0,2,43.38,1900,10581.0,2024-07-04,https://www.otodom.pl/pl/oferta/piekne-m-z-bal...
3,Grunwald,Poznań,3,880000.0,3,91.88,1900,9578.0,2024-07-04,https://www.otodom.pl/pl/oferta/twoje-m-w-kami...
4,Grunwald,Poznań,3,880000.0,3,91.88,1900,9578.0,2024-07-04,https://www.otodom.pl/pl/oferta/3pak-uzyskuj-z...


In [3]:
def remove_outliers(df):
    df_out = pd.DataFrame()
    for key, subdf in df.groupby('city'):
        m = np.mean(subdf.price_per_sq)
        st = np.std(subdf.price_per_sq)
        reduced_df = subdf[(subdf.price_per_sq>(m-st)) & (subdf.price_per_sq <= (m+st))]
        df_out = pd.concat([df_out, reduced_df], ignore_index = True)
    return df_out

df2 = remove_outliers(df1)

In [4]:
df2.shape

(29885, 10)

In [5]:
df2.isnull().sum()

district            0
city                0
floor               0
price               0
rooms               0
sq                  0
year                0
price_per_sq        0
update_date         0
offer_url       16681
dtype: int64

In [6]:
print(f'Krk:{len(df2[df2.city == "Kraków"].district.unique())}')
print(f'WWa:{len(df2[df2.city == "Warszawa"].district.unique())}')
print(f'Pzn:{len(df2[df2.city == "Poznań"].district.unique())}')

Krk:18
WWa:18
Pzn:5


In [7]:
districts = np.sort(df2[(df2.city == "Kraków")].district.unique())
districts

array(['Bieńczyce', 'Bieżanów-Prokocim', 'Bronowice', 'Czyżyny',
       'Dębniki', 'Grzegórzki', 'Krowodrza', 'Mistrzejowice', 'Nowa Huta',
       'Podgórze', 'Podgórze Duchackie', 'Prądnik Biały',
       'Prądnik Czerwony', 'Stare Miasto', 'Swoszowice',
       'Wzgórza Krzesławickie', 'Zwierzyniec', 'Łagiewniki-Borek Fałęcki'],
      dtype=object)

In [8]:
df2.head(1)

Unnamed: 0,district,city,floor,price,rooms,sq,year,price_per_sq,update_date,offer_url
0,Prądnik Biały,Kraków,4,749000.0,2,42.01,2023,17829.0,2024-07-04,https://www.otodom.pl/pl/oferta/2-pokoje-widok...


In [9]:
dummies = pd.get_dummies(df2.city)
df3 = pd.concat([df2,dummies],axis='columns')
df3.head(4)

Unnamed: 0,district,city,floor,price,rooms,sq,year,price_per_sq,update_date,offer_url,Kraków,Poznań,Warszawa
0,Prądnik Biały,Kraków,4,749000.0,2,42.01,2023,17829.0,2024-07-04,https://www.otodom.pl/pl/oferta/2-pokoje-widok...,True,False,False
1,Podgórze Duchackie,Kraków,2,649000.0,2,42.99,2023,15097.0,2024-07-04,https://www.otodom.pl/pl/oferta/wyjatkowe-mies...,True,False,False
2,Podgórze Duchackie,Kraków,2,649000.0,2,42.99,2023,15097.0,2024-07-04,https://www.otodom.pl/pl/oferta/nowe-mieszkani...,True,False,False
3,Podgórze Duchackie,Kraków,8,655000.0,2,36.5,2023,17945.0,2024-07-04,https://www.otodom.pl/pl/oferta/gotowe-dwustro...,True,False,False


In [10]:
dummies = pd.get_dummies(df2.district)
df3 = pd.concat([df3,dummies],axis='columns')
df3.head(4)

Unnamed: 0,district,city,floor,price,rooms,sq,year,price_per_sq,update_date,offer_url,...,Wesoła,Wilanów,Wilda,Wola,Wzgórza Krzesławickie,Włochy,Zwierzyniec,Łagiewniki-Borek Fałęcki,Śródmieście,Żoliborz
0,Prądnik Biały,Kraków,4,749000.0,2,42.01,2023,17829.0,2024-07-04,https://www.otodom.pl/pl/oferta/2-pokoje-widok...,...,False,False,False,False,False,False,False,False,False,False
1,Podgórze Duchackie,Kraków,2,649000.0,2,42.99,2023,15097.0,2024-07-04,https://www.otodom.pl/pl/oferta/wyjatkowe-mies...,...,False,False,False,False,False,False,False,False,False,False
2,Podgórze Duchackie,Kraków,2,649000.0,2,42.99,2023,15097.0,2024-07-04,https://www.otodom.pl/pl/oferta/nowe-mieszkani...,...,False,False,False,False,False,False,False,False,False,False
3,Podgórze Duchackie,Kraków,8,655000.0,2,36.5,2023,17945.0,2024-07-04,https://www.otodom.pl/pl/oferta/gotowe-dwustro...,...,False,False,False,False,False,False,False,False,False,False


In [11]:
X = df3.drop(['price',"district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
X_columns = X.columns
y = df3.price
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state = 10)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

margin = 0.1 * y_train
y_train_lower = y_train - margin
y_train_upper = y_train + margin

margin_test = 0.1 * y_test
y_test_lower = y_test - margin_test
y_test_upper = y_test + margin_test

In [12]:
df3.head()

Unnamed: 0,district,city,floor,price,rooms,sq,year,price_per_sq,update_date,offer_url,...,Wesoła,Wilanów,Wilda,Wola,Wzgórza Krzesławickie,Włochy,Zwierzyniec,Łagiewniki-Borek Fałęcki,Śródmieście,Żoliborz
0,Prądnik Biały,Kraków,4,749000.0,2,42.01,2023,17829.0,2024-07-04,https://www.otodom.pl/pl/oferta/2-pokoje-widok...,...,False,False,False,False,False,False,False,False,False,False
1,Podgórze Duchackie,Kraków,2,649000.0,2,42.99,2023,15097.0,2024-07-04,https://www.otodom.pl/pl/oferta/wyjatkowe-mies...,...,False,False,False,False,False,False,False,False,False,False
2,Podgórze Duchackie,Kraków,2,649000.0,2,42.99,2023,15097.0,2024-07-04,https://www.otodom.pl/pl/oferta/nowe-mieszkani...,...,False,False,False,False,False,False,False,False,False,False
3,Podgórze Duchackie,Kraków,8,655000.0,2,36.5,2023,17945.0,2024-07-04,https://www.otodom.pl/pl/oferta/gotowe-dwustro...,...,False,False,False,False,False,False,False,False,False,False
4,Bieżanów-Prokocim,Kraków,1,813000.0,3,66.7,2023,12189.0,2024-07-04,https://www.otodom.pl/pl/oferta/oddane-do-uzyt...,...,False,False,False,False,False,False,False,False,False,False


In [31]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Załóżmy, że df3 jest Twoją oryginalną ramką danych
data_to_corr = df3.drop(["district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')

# Normalizacja wszystkich kolumn
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data_to_corr)

# Przekształcenie z powrotem do DataFrame
scaled_df = pd.DataFrame(X_scaled, columns=data_to_corr.columns)

# Obliczanie macierzy korelacji
corr_matrix3 = scaled_df.corr()
print(corr_matrix3['price'].sort_values(ascending=False))

price                       1.000000
sq                          0.907759
sq_rooms                    0.847123
rooms                       0.631219
sq_per_room                 0.363777
Warszawa                    0.236638
Wilanów                     0.194347
Mokotów                     0.148627
Zwierzyniec                 0.118125
Śródmieście                 0.110108
Ursynów                     0.082958
Ochota                      0.063253
Żoliborz                    0.050677
Wola                        0.050019
floor                       0.035898
Bemowo                      0.033040
Grzegórzki                  0.028846
year                        0.014687
Krowodrza                   0.013940
Wawer                       0.013909
Łagiewniki-Borek Fałęcki    0.007083
Włochy                      0.004709
Praga-Południe              0.003946
Praga-Północ                0.002066
Bielany                     0.000155
Wesoła                     -0.001687
Czyżyny                    -0.005196
B

In [37]:
X_train.columns

Index(['floor', 'rooms', 'sq', 'year', 'Kraków', 'Poznań', 'Warszawa',
       'Bemowo', 'Białołęka', 'Bielany', 'Bieńczyce', 'Bieżanów-Prokocim',
       'Bronowice', 'Czyżyny', 'Dębniki', 'Grunwald', 'Grzegórzki', 'Jeżyce',
       'Krowodrza', 'Mistrzejowice', 'Mokotów', 'Nowa Huta', 'Nowe Miasto',
       'Ochota', 'Podgórze', 'Podgórze Duchackie', 'Praga-Południe',
       'Praga-Północ', 'Prądnik Biały', 'Prądnik Czerwony', 'Rembertów',
       'Stare Miasto', 'Swoszowice', 'Targówek', 'Ursus', 'Ursynów', 'Wawer',
       'Wesoła', 'Wilanów', 'Wilda', 'Wola', 'Wzgórza Krzesławickie', 'Włochy',
       'Zwierzyniec', 'Łagiewniki-Borek Fałęcki', 'Śródmieście', 'Żoliborz',
       'sq_rooms', 'sq_squared', 'rooms_squared', 'sq_per_room',
       'rooms_per_sq'],
      dtype='object')

In [38]:
correlation = data_to_corr.corr()['price'].abs().sort_values(ascending=False)

# Przypisanie wag na podstawie korelacji z 'price'
weights = [correlation.get(col, 1) for col in X_train.columns]

# Przeliczenie wag na tensor
current_weights = np.array(weights)
current_weights

array([3.58976326e-02, 6.31218911e-01, 9.07758766e-01, 1.46870786e-02,
       7.69761555e-02, 2.23044896e-01, 2.36637938e-01, 3.30395351e-02,
       5.28132642e-02, 1.55232039e-04, 5.71939460e-02, 5.64101492e-02,
       8.64028778e-03, 5.19641361e-03, 1.42034098e-02, 9.90400022e-02,
       2.88456995e-02, 6.98932929e-02, 1.39396873e-02, 5.17363793e-02,
       1.48626607e-01, 4.81728758e-02, 1.18774844e-01, 6.32525436e-02,
       2.84329851e-02, 4.58592513e-02, 3.94611074e-03, 2.06550635e-03,
       5.20898130e-02, 1.34064474e-02, 2.15857729e-02, 7.06321344e-02,
       2.34787588e-02, 3.11380617e-02, 3.02728083e-02, 8.29575371e-02,
       1.39088209e-02, 1.68650524e-03, 1.94347453e-01, 4.23480210e-02,
       5.00194838e-02, 2.06334737e-02, 4.70881934e-03, 1.18124663e-01,
       7.08259491e-03, 1.10108168e-01, 5.06772667e-02, 8.47123072e-01,
       1.00000000e+00, 1.00000000e+00, 3.63776693e-01, 3.01812220e-01])

In [41]:
data_to_corr = df3.drop(["district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')

X = df3.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
X_columns = X.columns
y = df3.price
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

margin = 0.1 * y_train
y_train_lower = y_train - margin
y_train_upper = y_train + margin

margin_test = 0.1 * y_test
y_test_lower = y_test - margin_test
y_test_upper = y_test + margin_test

# Wyznaczanie korelacji dla kolumn obecnych w `X_train`
# Wyznaczanie korelacji dla kolumn obecnych w `X_train`
correlation = data_to_corr.corr()['price'].abs()

# Normalizacja cech w zbiorze treningowym
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Dodanie znormalizowanych ważonych cech
def add_weighted_features(X, correlation, X_columns):
    # Przypisanie wag na podstawie korelacji tylko dla kolumn w `X_train`
    weights = np.array([correlation.get(col, 1) for col in X_columns])
    weighted_features = X * weights
    # Łączenie oryginalnych cech z nowymi, ważonymi cechami
    return np.concatenate((X, weighted_features), axis=1)

# Tworzenie rozszerzonych zbiorów danych
X_train_extended = add_weighted_features(X_train_scaled, correlation, X_train.columns)
X_test_extended = add_weighted_features(X_test_scaled, correlation, X_train.columns)

# Budowa modelu z nowymi wejściami
input_layer = Input(shape=(X_train_extended.shape[1],))
dense1 = Dense(128, activation='relu')(input_layer)
dense2 = Dense(64, activation='relu')(dense1)
dense3 = Dense(32, activation='relu')(dense2)

# Wyjścia modelu
output_lower = Dense(1, name='lower_output')(dense3)
output_upper = Dense(1, name='upper_output')(dense3)

# Kompilacja modelu
weighted_model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
weighted_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trening modelu
history = weighted_model.fit(X_train_extended, [y_train_lower, y_train_upper],
                             epochs=100, batch_size=32, validation_split=0.2)

# Ewaluacja modelu
test_loss, test_mse, test_mae = weighted_model.evaluate(
    X_test_extended, [y_test_lower, y_test_upper]
)

# Sprawdzanie wyników modelu
y_pred_lower, y_pred_upper = weighted_model.predict(X_test_extended)

# R² dla dolnych i górnych granic
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic (weighted_model): {r2_lower}")
print(f"R² dla górnych granic (weighted_model): {r2_upper}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 1925065080832.0000 - lower_output_mse: 769071775744.0000 - upper_output_mae: 952051.5000 - val_loss: 314787725312.0000 - val_lower_output_mse: 110174691328.0000 - val_upper_output_mae: 412322.5625
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 172228902912.0000 - lower_output_mse: 63038689280.0000 - upper_output_mae: 270031.0938 - val_loss: 50206638080.0000 - val_lower_output_mse: 20318066688.0000 - val_upper_output_mae: 115787.6406
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 47694954496.0000 - lower_output_mse: 19152726016.0000 - upper_output_mae: 118351.1562 - val_loss: 44923015168.0000 - val_lower_output_mse: 18020874240.0000 - val_upper_output_mae: 111193.5234
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 42955137024.0000 - lower_output_mse: 1

In [42]:
from keras.models import Model
from keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf

# Wyznaczanie korelacji z ceną
correlation = data_to_corr.corr()['price'].abs()

# Przypisanie wag na podstawie korelacji z 'price'
weights = [correlation.get(col, 1) for col in X.columns]

# Mnożenie cech przez wagi
X_weighted_train = X_train * weights
X_weighted_test = X_test * weights

# Skalowanie nowych cech ważonych
scaler_weighted = StandardScaler()
X_weighted_train_scaled = scaler_weighted.fit_transform(X_weighted_train)
X_weighted_test_scaled = scaler_weighted.transform(X_weighted_test)

# Budowanie modelu
input_layer = Input(shape=(X_weighted_train_scaled.shape[1],))
dense1 = Dense(64, activation='relu')(input_layer)
dense2 = Dense(32, activation='relu')(dense1)
output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja modelu
weighted_only_model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
weighted_only_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trening modelu
history_weighted_only = weighted_only_model.fit(
    X_weighted_train_scaled, [y_train_lower, y_train_upper],
    epochs=100, batch_size=32, validation_split=0.2
)

# Ewaluacja modelu
test_loss, test_mse, test_mae = weighted_only_model.evaluate(
    X_weighted_test_scaled, [y_test_lower, y_test_upper]
)

# Predykcje
y_pred_lower, y_pred_upper = weighted_only_model.predict(X_weighted_test_scaled)

# R² dla dolnych i górnych granic
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic (weighted_only_model): {r2_lower}")
print(f"R² dla górnych granic (weighted_only_model): {r2_upper}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 2032617914368.0000 - lower_output_mse: 815016181760.0000 - upper_output_mae: 975462.8125 - val_loss: 1938583584768.0000 - val_lower_output_mse: 776467447808.0000 - val_upper_output_mae: 964463.8750
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1947818524672.0000 - lower_output_mse: 779082268672.0000 - upper_output_mae: 960607.7500 - val_loss: 1805113098240.0000 - val_lower_output_mse: 717166608384.0000 - val_upper_output_mae: 934822.6250
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1781136752640.0000 - lower_output_mse: 704885751808.0000 - upper_output_mae: 925648.9375 - val_loss: 1519292514304.0000 - val_lower_output_mse: 591214018560.0000 - val_upper_output_mae: 867053.4375
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 1445039177728.0000 - lower

In [13]:
data_to_corr = df3.drop(["district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
corr_matrix = data_to_corr.corr()

In [14]:
corr_matrix = data_to_corr.corr()
# print(corr_matrix['sq'].sort_values(ascending=False))

In [15]:
df3['sq_rooms'] = df3['sq'] * df3['rooms']
df3['sq_per_room'] = df3['sq'] / df3['rooms']
df3['rooms_per_sq'] = df3['rooms'] / df3['sq']

In [62]:
data_to_corr2 = df3.drop(["district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
corr_matrix2 = data_to_corr2.corr()
#print(corr_matrix2['price'].sort_values(ascending=False))

In [52]:
X = df3.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
X_columns = X.columns
y = df3.price

# Dzielimy dane na zestaw treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# Normalizacja
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Ustalamy dolne i górne granice
margin = 0.1 * y_train
y_train_lower = y_train - margin
y_train_upper = y_train + margin

# Definiowanie modelu
input_layer = Input(shape=(X_train.shape[1],))
dense1 = Dense(64, activation='relu')(input_layer)
dense2 = Dense(32, activation='relu')(dense1)

output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu
history = model.fit(X_train_scaled, [y_train_lower, y_train_upper], 
                    epochs=100, batch_size=32, validation_split=0.2)

# Ewaluacja modelu
test_loss, test_mse, test_mae = model.evaluate(X_test_scaled, [y_test_lower, y_test_upper])

# Przewidywanie
y_pred_lower, y_pred_upper = model.predict(X_test_scaled)

# R^2 dla dolnych i górnych granic
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)
print(f"R^2 dla dolnych granic: {r2_lower}")
print(f"R^2 dla górnych granic: {r2_upper}")

Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 2018635546624.0000 - lower_output_mse: 809364553728.0000 - upper_output_mae: 977070.0625 - val_loss: 1934364770304.0000 - val_lower_output_mse: 774337462272.0000 - val_upper_output_mae: 963610.1250
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2026024599552.0000 - lower_output_mse: 809662480384.0000 - upper_output_mae: 964487.3125 - val_loss: 1771845058560.0000 - val_lower_output_mse: 701380362240.0000 - val_upper_output_mae: 927225.8750
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1732942626816.0000 - lower_output_mse: 682079289344.0000 - upper_output_mae: 916000.8750 - val_loss: 1423015149568.0000 - val_lower_output_mse: 546978562048.0000 - val_upper_output_mae: 842468.0000
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1375337578496.0000 - lower

In [60]:
from keras.models import Model
from keras.layers import Input, Dense

weights = [1 for i in range(len(X_train.columns))]

current_weights = np.array(weights) 
sample_weights = np.dot(X_train, current_weights)
sample_weights_tensor = tf.convert_to_tensor(sample_weights, dtype=tf.float32)

input_layer_split = Input(shape=(X_train.shape[1],))
dense1_split = Dense(64, activation='relu')(input_layer_split)
dense2_split = Dense(32, activation='relu')(dense1_split)

# Wyjście dla dolnej granicy przedziału
output_lower_split = Dense(1, name='lower_output_split')(dense2_split)

# Wyjście dla górnej granicy przedziału
output_upper_split = Dense(1, name='upper_output_split')(dense2_split)

# Kompilacja modelu z dwoma wyjściami
split_fork_model = Model(inputs=input_layer_split, outputs=[output_lower_split, output_upper_split])
split_fork_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu
history_split = split_fork_model.fit(X_train_scaled, [y_train_lower, y_train_upper], epochs=100, batch_size=32, validation_split=0.2, sample_weight=sample_weights_tensor)

# Ewaluacja modelu
test_loss_split, test_mse_split, test_mae_split = split_fork_model.evaluate(X_test_scaled, [y_test_lower, y_test_upper])

# Sprawdzanie wyniku modelu
y_pred_lower_split, y_pred_upper_split = split_fork_model.predict(X_test_scaled)

# R^2 dla dolnych i górnych granic
r2_lower_split = r2_score(y_test_lower, y_pred_lower_split)
r2_upper_split = r2_score(y_test_upper, y_pred_upper_split)
print(f"R^2 dla dolnych granic (split_fork_model): {r2_lower_split}")
print(f"R^2 dla górnych granic (split_fork_model): {r2_upper_split}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 4177263448817664.0000 - lower_output_split_mse: 797974790144.0000 - upper_output_split_mae: 974032.6250 - val_loss: 4070188840386560.0000 - val_lower_output_split_mse: 777581035520.0000 - val_upper_output_split_mae: 964802.3125
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 4136076591497216.0000 - lower_output_split_mse: 786990497792.0000 - upper_output_split_mae: 964443.1875 - val_loss: 3824298741465088.0000 - val_lower_output_split_mse: 724502904832.0000 - val_upper_output_split_mae: 935821.4375
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3751587897933824.0000 - lower_output_split_mse: 707041755136.0000 - upper_output_split_mae: 924829.3750 - val_loss: 3293557016231936.0000 - val_lower_output_split_mse: 611129491456.0000 - val_upper_output_split_mae: 869479.5625
Epoch 4/100
[1m598/598[0m [3

In [64]:
from keras.layers import Concatenate, Input, Dense
from keras.models import Model
from sklearn.metrics import mean_absolute_error, r2_score

# Definiowanie wejść
input_layer = Input(shape=(X_train.shape[1],))

# Podział na cechy sq i rooms
sq_input = Input(shape=(1,), name='sq_input')
rooms_input = Input(shape=(1,), name='rooms_input')

# Przeszłość dla każdej cechy
sq_dense = Dense(64, activation='relu')(sq_input)
rooms_dense = Dense(64, activation='relu')(rooms_input)

# Połączenie cech
concatenated = Concatenate()([sq_dense, rooms_dense, input_layer])

dense1 = Dense(64, activation='relu')(concatenated)
dense2 = Dense(32, activation='relu')(dense1)

output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja modelu
model = Model(inputs=[sq_input, rooms_input, input_layer], outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu - zmień na .values aby uzyskać tablice NumPy
history = model.fit([X_train['sq'].values, X_train['rooms'].values, X_train_scaled], 
                    [y_train_lower, y_train_upper], 
                    epochs=100, batch_size=32, validation_split=0.2, 
                    sample_weight=sample_weights_tensor)

# Ewaluacja modelu
test_loss, test_mse, test_mae = model.evaluate(
    [X_test['sq'].values, X_test['rooms'].values, X_test_scaled], 
    [y_test_lower, y_test_upper]
)

# Predykcja
y_pred_lower, y_pred_upper = model.predict([X_test['sq'].values, X_test['rooms'].values, X_test_scaled])

# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic: {r2_lower:.4f}")
print(f"R² dla górnych granic: {r2_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 3783335759314944.0000 - lower_output_mse: 704465600512.0000 - upper_output_mae: 909181.0000 - val_loss: 203475585597440.0000 - val_lower_output_mse: 34138126336.0000 - val_upper_output_mae: 171839.6875
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 186084138942464.0000 - lower_output_mse: 34091397120.0000 - upper_output_mae: 161412.0625 - val_loss: 152802991013888.0000 - val_lower_output_mse: 29247387648.0000 - val_upper_output_mae: 155959.6094
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 162675359219712.0000 - lower_output_mse: 30948630528.0000 - upper_output_mae: 157955.5781 - val_loss: 149490732367872.0000 - val_lower_output_mse: 28493090816.0000 - val_upper_output_mae: 152744.2812
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 161181029367808.00

In [73]:
from keras.models import Model
from keras.layers import Input, Dense

current_weights = np.array([9 if col == 'sq' else 6 if col == 'rooms' else 1 for col in X_train.columns])

sample_weights = np.dot(X_train, current_weights)
sample_weights_tensor = tf.convert_to_tensor(sample_weights, dtype=tf.float32)

input_layer_split = Input(shape=(X_train.shape[1],))
dense1_split = Dense(64, activation='relu')(input_layer_split)
dense2_split = Dense(32, activation='relu')(dense1_split)

# Wyjście dla dolnej granicy przedziału
output_lower_split = Dense(1, name='lower_output_split')(dense2_split)

# Wyjście dla górnej granicy przedziału
output_upper_split = Dense(1, name='upper_output_split')(dense2_split)

# Kompilacja modelu z dwoma wyjściami
split_fork_model = Model(inputs=input_layer_split, outputs=[output_lower_split, output_upper_split])
split_fork_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu
history_split = split_fork_model.fit(X_train_scaled, [y_train_lower, y_train_upper], epochs=100, batch_size=32, validation_split=0.2, sample_weight=sample_weights_tensor)

# Ewaluacja modelu
test_loss_split, test_mse_split, test_mae_split = split_fork_model.evaluate(X_test_scaled, [y_test_lower, y_test_upper])

# Sprawdzanie wyniku modelu
y_pred_lower_split, y_pred_upper_split = split_fork_model.predict(X_test_scaled)

# R^2 dla dolnych i górnych granic
r2_lower_split = r2_score(y_test_lower, y_pred_lower_split)
r2_upper_split = r2_score(y_test_upper, y_pred_upper_split)
print(f"R^2 dla dolnych granic (split_fork_model): {r2_lower_split}")
print(f"R^2 dla górnych granic (split_fork_model): {r2_upper_split}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 6660108612272128.0000 - lower_output_split_mse: 842508795904.0000 - upper_output_split_mae: 980592.2500 - val_loss: 5484722629115904.0000 - val_lower_output_split_mse: 776070627328.0000 - val_upper_output_split_mae: 964302.9375
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6479762398642176.0000 - lower_output_split_mse: 818295472128.0000 - upper_output_split_mae: 967242.9375 - val_loss: 5163699593543680.0000 - val_lower_output_split_mse: 722897534976.0000 - val_upper_output_split_mae: 936145.6250
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 5317569481277440.0000 - lower_output_split_mse: 732605251584.0000 - upper_output_split_mae: 934185.3125 - val_loss: 4390048745127936.0000 - val_lower_output_split_mse: 597617344512.0000 - val_upper_output_split_mae: 864882.3125
Epoch 4/100
[1m598/598[0m [3

In [69]:
from keras.layers import Dropout, Concatenate, Input, Dense
from keras.models import Model
from sklearn.metrics import mean_absolute_error, r2_score

# Definiowanie wejść
input_layer = Input(shape=(X_train.shape[1],))

# Podział na cechy sq i rooms
sq_input = Input(shape=(1,), name='sq_input')
rooms_input = Input(shape=(1,), name='rooms_input')

# Przeszłość dla każdej cechy
sq_dense = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(sq_input)
rooms_dense = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(rooms_input)

# Połączenie cech
concatenated = Concatenate()([sq_dense, rooms_dense, input_layer])

# Dodatkowe warstwy
dense1 = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(concatenated)
dropout1 = Dropout(0.2)(dense1)
dense2 = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(dropout1)

output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja modelu
model = Model(inputs=[sq_input, rooms_input, input_layer], outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu
history = model.fit(
    [X_train['sq'].values, X_train['rooms'].values, X_train_scaled],
    [y_train_lower, y_train_upper],
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    sample_weight=sample_weights_tensor
)

# Ewaluacja modelu
test_loss, test_mse, test_mae = model.evaluate(
    [X_test['sq'].values, X_test['rooms'].values, X_test_scaled],
    [y_test_lower, y_test_upper]
)

# Predykcja
y_pred_lower, y_pred_upper = model.predict([X_test['sq'].values, X_test['rooms'].values, X_test_scaled])


# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic: {r2_lower:.4f}")
print(f"R² dla górnych granic: {r2_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 3816742283378688.0000 - lower_output_mse: 565052375040.0000 - upper_output_mae: 759818.5625 - val_loss: 198161721196544.0000 - val_lower_output_mse: 29168273408.0000 - val_upper_output_mae: 153736.7969
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 236654023409664.0000 - lower_output_mse: 34453938176.0000 - upper_output_mae: 165522.6250 - val_loss: 192636212215808.0000 - val_lower_output_mse: 28590514176.0000 - val_upper_output_mae: 154612.5781
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 224617058795520.0000 - lower_output_mse: 33230850048.0000 - upper_output_mae: 162372.6094 - val_loss: 187835864842240.0000 - val_lower_output_mse: 27805317120.0000 - val_upper_output_mae: 151927.6406
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 237787609563136.00

In [72]:
from keras.layers import Dropout, Concatenate, Input, Dense
from keras.models import Model
from keras.regularizers import l2
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf

# Definiowanie funkcji R²
def r_squared(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))  # Suma reszt
    ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))  # Całkowita suma kwadratów
    return 1 - ss_res / (ss_tot + tf.keras.backend.epsilon())  # Współczynnik R²

# Zmodyfikowany model z regularizacją L2
input_layer = Input(shape=(X_train.shape[1],))

sq_input = Input(shape=(1,), name='sq_input')
rooms_input = Input(shape=(1,), name='rooms_input')

sq_dense = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(sq_input)  # Dodana regularizacja L2
rooms_dense = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(rooms_input)

concatenated = Concatenate()([sq_dense, rooms_dense, input_layer])

dense1 = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(concatenated)  # Dodana regularizacja L2
dropout1 = Dropout(0.2)(dense1)
dense2 = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(dropout1)  # Dodana regularizacja L2

output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja modelu z R² jako dodatkową metryką dla każdego wyjścia
model = Model(inputs=[sq_input, rooms_input, input_layer], outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', 
              metrics=[[r_squared, 'mae'], [r_squared, 'mae']])  # Metryki dla obu wyjść

# Trenowanie modelu
history = model.fit(
    [X_train['sq'].values, X_train['rooms'].values, X_train_scaled],
    [y_train_lower, y_train_upper],
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    sample_weight=sample_weights_tensor
)

# Ewaluacja modelu
test_loss, (r2_lower, test_mae_lower), (r2_upper, test_mae_upper) = model.evaluate(
    [X_test['sq'].values, X_test['rooms'].values, X_test_scaled],
    [y_test_lower, y_test_upper]
)

# Predykcja
y_pred_lower, y_pred_upper = model.predict([X_test['sq'].values, X_test['rooms'].values, X_test_scaled])

# Obliczanie dodatkowych metryk
mae_lower = mean_absolute_error(y_test_lower, y_pred_lower)
mae_upper = mean_absolute_error(y_test_upper, y_pred_upper)

# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

# Wyświetlanie wyników
print(f"Test loss: {test_loss:.4f}")
print(f"R² dla dolnych granic: {r2_lower:.4f}")
print(f"Test MAE dla dolnych granic: {mae_lower:.4f}")
print(f"R² dla górnych granic: {r2_upper:.4f}")
print(f"Test MAE dla górnych granic: {mae_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 4179097131417600.0000 - lower_output_mae: 639505.6875 - lower_output_r_squared: -4.0026 - upper_output_mae: 795470.8125 - upper_output_r_squared: -4.1208 - val_loss: 198806956146688.0000 - val_lower_output_mae: 129080.9062 - val_lower_output_r_squared: 0.7507 - val_upper_output_mae: 156690.8906 - val_upper_output_r_squared: 0.7539
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 240800562675712.0000 - lower_output_mae: 137011.1719 - lower_output_r_squared: 0.7042 - upper_output_mae: 166895.4531 - upper_output_r_squared: 0.7068 - val_loss: 193978272055296.0000 - val_lower_output_mae: 124987.4766 - val_lower_output_r_squared: 0.7646 - val_upper_output_mae: 152813.1562 - val_upper_output_r_squared: 0.7645
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 223262768365568.0000 - lower_output_mae: 133283.5000

ValueError: too many values to unpack (expected 3)

In [18]:
from keras.layers import Multiply, Input, Dense
from keras.models import Model
import numpy as np

# Definiowanie wag dla cech
weights_vector = np.array([9 if col == 'sq' else 6 if col == 'rooms' else 1 for col in X_train.columns])
weights_input = Input(shape=(X_train.shape[1],))
weighted_input = Multiply()([weights_input, weights_vector])  # Użycie wag bez przeskalowania danych

# Dalsze warstwy modelu
dense1 = Dense(64, activation='relu')(weighted_input)
dense2 = Dense(32, activation='relu')(dense1)
output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja i trenowanie
model = Model(inputs=weights_input, outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])
history = model.fit(X_train, [y_train_lower, y_train_upper], epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 23789716478361600.0000 - lower_output_split_mse: 718147551232.0000 - upper_output_split_mae: 903794.3125 - val_loss: 13247094332588032.0000 - val_lower_output_split_mse: 446158307328.0000 - val_upper_output_split_mae: 682299.9375
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 14797795496034304.0000 - lower_output_split_mse: 425955786752.0000 - upper_output_split_mae: 661381.9375 - val_loss: 9954831175254016.0000 - val_lower_output_split_mse: 300687687680.0000 - val_upper_output_split_mae: 549376.9375
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 13405193152495616.0000 - lower_output_split_mse: 301733904384.0000 - upper_output_split_mae: 542389.5625 - val_loss: 8467022291664896.0000 - val_lower_output_split_mse: 228906631168.0000 - val_upper_output_split_mae: 452298.1562
Epoch 4/100
[1m598/598[0m

In [29]:
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.metrics import r2_score
import numpy as np
import tensorflow as tf

# Definiowanie wag dla cech
weights_vector = np.array([100 if col == 'sq' else 30 if col == 'rooms' else 1 for col in X_train.columns])

# Ręczne skalowanie danych treningowych przed podaniem do modelu
X_train_weighted = X_train * weights_vector
X_test_weighted = X_test * weights_vector

# Definiowanie wejścia i modelu
input_layer = Input(shape=(X_train.shape[1],))
dense1 = Dense(64, activation='relu')(input_layer)
dense2 = Dense(32, activation='relu')(dense1)
output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja i trenowanie modelu
model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu na przeskalowanych danych
history = model.fit(X_train_weighted, [y_train_lower, y_train_upper], epochs=100, batch_size=32, validation_split=0.2)

# Ewaluacja modelu na przeskalowanych danych testowych
test_loss, test_mse, test_mae = model.evaluate(X_test_weighted, [y_test_lower, y_test_upper])

# Predykcja
y_pred_lower, y_pred_upper = model.predict(X_test_weighted)

# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic (waga cech): {r2_lower:.4f}")
print(f"R² dla górnych granic (waga cech): {r2_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 1487182102528.0000 - lower_output_mse: 597003075584.0000 - upper_output_mae: 777703.3125 - val_loss: 117751185408.0000 - val_lower_output_mse: 46837698560.0000 - val_upper_output_mae: 170486.2344
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 329149939712.0000 - lower_output_mse: 133481390080.0000 - upper_output_mae: 173295.7344 - val_loss: 99014582272.0000 - val_lower_output_mse: 39803805696.0000 - val_upper_output_mae: 153397.8281
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 194663989248.0000 - lower_output_mse: 78398111744.0000 - upper_output_mae: 158498.7969 - val_loss: 91167563776.0000 - val_lower_output_mse: 36534206464.0000 - val_upper_output_mae: 153755.2031
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 83348291584.0000 - lower_output_mse: 

In [27]:
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.metrics import r2_score
import numpy as np
import tensorflow as tf

# Definiowanie wag dla cech
weights_vector = np.array([1 for col in X_train.columns])

# Ręczne skalowanie danych treningowych przed podaniem do modelu
X_train_weighted = X_train * weights_vector
X_test_weighted = X_test * weights_vector

# Definiowanie wejścia i modelu
input_layer = Input(shape=(X_train.shape[1],))
dense1 = Dense(64, activation='relu')(input_layer)
dense2 = Dense(32, activation='relu')(dense1)
output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja i trenowanie modelu
model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

# Trenowanie modelu na przeskalowanych danych
history = model.fit(X_train_weighted, [y_train_lower, y_train_upper], epochs=100, batch_size=32, validation_split=0.2)

# Ewaluacja modelu na przeskalowanych danych testowych
test_loss, test_mse, test_mae = model.evaluate(X_test_weighted, [y_test_lower, y_test_upper])

# Predykcja
y_pred_lower, y_pred_upper = model.predict(X_test_weighted)

# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic (waga cech): {r2_lower:.4f}")
print(f"R² dla górnych granic (waga cech): {r2_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 1669248188416.0000 - lower_output_mse: 670943936512.0000 - upper_output_mae: 804869.0000 - val_loss: 236641042432.0000 - val_lower_output_mse: 95006007296.0000 - val_upper_output_mae: 215164.5000
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 188309602304.0000 - lower_output_mse: 75693785088.0000 - upper_output_mae: 189586.6250 - val_loss: 181059960832.0000 - val_lower_output_mse: 72353226752.0000 - val_upper_output_mae: 187479.2656
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 402635096064.0000 - lower_output_mse: 163263135744.0000 - upper_output_mae: 185316.7344 - val_loss: 144752967680.0000 - val_lower_output_mse: 58079375360.0000 - val_upper_output_mae: 170506.7344
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 118627196928.0000 - lower_output_ms

In [23]:
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.metrics import r2_score

# Dodanie dodatkowych cech
X_train['sq_rooms'] = X_train['sq'] * X_train['rooms']
X_train['sq_per_room'] = X_train['sq'] / X_train['rooms']
X_train['rooms_per_sq'] = X_train['rooms'] / X_train['sq']

X_test['sq_rooms'] = X_test['sq'] * X_test['rooms']
X_test['sq_per_room'] = X_test['sq'] / X_test['rooms']
X_test['rooms_per_sq'] = X_test['rooms'] / X_test['sq']

# Użycie nowego modelu na tych danych
input_layer = Input(shape=(X_train.shape[1],))
dense1 = Dense(64, activation='relu')(input_layer)
dense2 = Dense(32, activation='relu')(dense1)
output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja i trenowanie
model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])
history = model.fit(X_train, [y_train_lower, y_train_upper], epochs=100, batch_size=32, validation_split=0.2)

# Ewaluacja modelu
test_loss, test_mse, test_mae = model.evaluate(X_test, [y_test_lower, y_test_upper])

# Predykcja
y_pred_lower, y_pred_upper = model.predict(X_test)

# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic (dodatkowe cechy): {r2_lower:.4f}")
print(f"R² dla górnych granic (dodatkowe cechy): {r2_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1461701574656.0000 - lower_output_mse: 597722005504.0000 - upper_output_mae: 774381.6250 - val_loss: 232784232448.0000 - val_lower_output_mse: 95358910464.0000 - val_upper_output_mae: 238758.7344
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 263149158400.0000 - lower_output_mse: 105954172928.0000 - upper_output_mae: 201462.1094 - val_loss: 156548628480.0000 - val_lower_output_mse: 63459950592.0000 - val_upper_output_mae: 174839.7031
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 221919657984.0000 - lower_output_mse: 88231911424.0000 - upper_output_mae: 171459.6562 - val_loss: 134956007424.0000 - val_lower_output_mse: 54590083072.0000 - val_upper_output_mae: 171436.2656
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 340842643456.0000 - lower_output_ms

In [26]:
from keras.layers import Concatenate, Input, Dense
from keras.models import Model
from sklearn.metrics import r2_score

# Upewnienie się, że dane wejściowe są typu float
X_train_sq = X_train['sq'].values.astype(np.float32)
X_train_rooms = X_train['rooms'].values.astype(np.float32)
X_train_other = X_train.drop(['sq', 'rooms'], axis=1).values.astype(np.float32)

X_test_sq = X_test['sq'].values.astype(np.float32)
X_test_rooms = X_test['rooms'].values.astype(np.float32)
X_test_other = X_test.drop(['sq', 'rooms'], axis=1).values.astype(np.float32)

# Definiowanie oddzielnych wejść
sq_input = Input(shape=(1,), name='sq_input')
rooms_input = Input(shape=(1,), name='rooms_input')
other_features_input = Input(shape=(X_train_other.shape[1],), name='other_features_input')

# Warstwy przetwarzające sq i rooms
sq_dense = Dense(64, activation='relu')(sq_input)
rooms_dense = Dense(64, activation='relu')(rooms_input)

# Łączenie przetworzonych cech
concatenated = Concatenate()([sq_dense, rooms_dense, other_features_input])

# Dalsze warstwy modelu
dense1 = Dense(64, activation='relu')(concatenated)
dense2 = Dense(32, activation='relu')(dense1)
output_lower = Dense(1, name='lower_output')(dense2)
output_upper = Dense(1, name='upper_output')(dense2)

# Kompilacja i trenowanie
model = Model(inputs=[sq_input, rooms_input, other_features_input], outputs=[output_lower, output_upper])
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

history = model.fit(
    [X_train_sq, X_train_rooms, X_train_other],
    [y_train_lower, y_train_upper],
    epochs=100, batch_size=32, validation_split=0.2
)

# Ewaluacja modelu
test_loss, test_mse, test_mae = model.evaluate(
    [X_test_sq, X_test_rooms, X_test_other],
    [y_test_lower, y_test_upper]
)

# Predykcja
y_pred_lower, y_pred_upper = model.predict(
    [X_test_sq, X_test_rooms, X_test_other]
)

# Obliczanie R²
r2_lower = r2_score(y_test_lower, y_pred_lower)
r2_upper = r2_score(y_test_upper, y_pred_upper)

print(f"R² dla dolnych granic (oddzielne wejścia): {r2_lower:.4f}")
print(f"R² dla górnych granic (oddzielne wejścia): {r2_upper:.4f}")


Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1497099010048.0000 - lower_output_mse: 586933534720.0000 - upper_output_mae: 816264.8125 - val_loss: 154149797888.0000 - val_lower_output_mse: 61811671040.0000 - val_upper_output_mae: 158761.0938
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 306962661376.0000 - lower_output_mse: 123480317952.0000 - upper_output_mae: 173866.6719 - val_loss: 108910804992.0000 - val_lower_output_mse: 43661324288.0000 - val_upper_output_mae: 158349.7031
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 158911840256.0000 - lower_output_mse: 63754928128.0000 - upper_output_mae: 159490.6719 - val_loss: 99820470272.0000 - val_lower_output_mse: 40060424192.0000 - val_upper_output_mae: 160412.9062
Epoch 4/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 175223029760.0000 - lower_output_mse

In [59]:
def predict_price_tf_fork(city, district, floor, rooms, sq, year, model):
    X = df3.drop(['price',"district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
    try:
        district_index = np.where(X.columns==district)[0][0]
        city_index = np.where(X.columns==city)[0][0]
    except IndexError:
        district_index = -1
        city_index = -1

    x = np.zeros(len(X.columns))
    x[0] = floor
    x[1] = rooms
    x[2] = sq
    x[3] = year

    if district_index >= 0:
        x[district_index] = 1
    if city_index >= 0:
        x[city_index] = 1

    x = x.reshape(1, -1)

    preds = model.predict(x)
    lower_pred = preds[0][0]
    upper_pred = preds[1][0]

    return lower_pred, upper_pred

In [64]:
print(predict_price_tf_fork('Warszawa', 'Mokotów', 3, 2, 60, 2010, split_fork_model))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
(array([866653.8], dtype=float32), array([1053340.1], dtype=float32))


In [57]:
def predict_price_tf_fork_actual(city, district, floor, rooms, sq, year, model, scaler, correlation, X_columns):
    # Stworzenie danych wejściowych z odpowiednimi kolumnami
    x = np.zeros(len(X_columns))
    if 'floor' in X_columns:
        x[np.where(X_columns == 'floor')[0][0]] = floor
    if 'rooms' in X_columns:
        x[np.where(X_columns == 'rooms')[0][0]] = rooms
    if 'sq' in X_columns:
        x[np.where(X_columns == 'sq')[0][0]] = sq
    if 'year' in X_columns:
        x[np.where(X_columns == 'year')[0][0]] = year

    # Obsługa miasta i dzielnicy
    if city in X_columns:
        city_index = np.where(X_columns == city)[0][0]
        x[city_index] = 1
    if district in X_columns:
        district_index = np.where(X_columns == district)[0][0]
        x[district_index] = 1

    # Przekształcanie danych wejściowych w ramkę danych
    x_df = pd.DataFrame([x], columns=X_columns)
    
    # Skalowanie cech
    x_scaled = scaler.transform(x_df)
    
    # Dodanie znormalizowanych ważonych cech
    weights = np.array([correlation.get(col, 1) for col in X_columns])
    weighted_features = x_scaled * weights
    x_extended = np.concatenate((x_scaled, weighted_features), axis=1)

    # Predykcja modelu
    preds = model.predict(x_extended)
    lower_pred = preds[0][0]
    upper_pred = preds[1][0]

    return lower_pred, upper_pred


In [58]:
lower, upper = predict_price_tf_fork_actual(city='Warszawa', district='Mokotów', floor=3, rooms=2, sq=60, year=2010, model=weighted_model, scaler=scaler, correlation=correlation, X_columns=X_train.columns)
print(f"Przewidywana dolna granica: {lower}, górna granica: {upper}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Przewidywana dolna granica: [1544721.1], górna granica: [1887321.6]
