In [19]:
import os
import pandas as pd
import json
import numpy as np
from django.conf import settings
from django.db import connection
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model, load_model
from keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pickle


def remove_outliers(df):
    df_out = pd.DataFrame()
    for key, subdf in df.groupby('city'):
        m = np.mean(subdf.price_per_sq)
        st = np.std(subdf.price_per_sq)
        reduced_df = subdf[(subdf.price_per_sq>(m-st)) & (subdf.price_per_sq <= (m+st))]
        df_out = pd.concat([df_out, reduced_df], ignore_index = True)
    return df_out


def add_weighted_features(X, correlation, X_columns):
    weights = np.array([correlation.get(col, 1) for col in X_columns])
    weighted_features = X * weights
    return np.concatenate((X, weighted_features), axis=1)


def train_model(data_period, status):
    
    data = pd.read_csv("Updated_Houses.csv", encoding = "utf-8")
    data = remove_outliers(data)

    dummies = pd.get_dummies(data.city)
    prepared_df = pd.concat([data,dummies],axis='columns')
    dummies = pd.get_dummies(data.district)
    prepared_df = pd.concat([prepared_df,dummies],axis='columns')

    prepared_df.columns = prepared_df.columns.str.lower()
    data_to_corr = prepared_df.drop(["district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X = prepared_df.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X_columns = X.columns
    y = prepared_df.price
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    correlation = data_to_corr.corr()['price'].abs()

    X_train_extended = add_weighted_features(X_train_scaled, correlation, X_train.columns)
    X_test_extended = add_weighted_features(X_test_scaled, correlation, X_train.columns)

    print("Rozpoczęcie treningu")

    input_layer = Input(shape=(X_train_extended.shape[1],))
    dense1 = Dense(128, activation='relu')(input_layer)
    dense2 = Dense(64, activation='relu')(dense1)
    dense3 = Dense(32, activation='relu')(dense2)

    output_lower = Dense(1, name='lower_output')(dense3)
    output_upper = Dense(1, name='upper_output')(dense3)

    weighted_model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
    weighted_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=10, 
        restore_best_weights=True
    )

    best_model_path = f"m1.keras"

    model_checkpoint = ModelCheckpoint(
        best_model_path, 
        monitor='val_loss', 
        save_best_only=True, 
        save_weights_only=False
    )

    margin = 0.1 * y_train
    y_train_lower = y_train - margin
    y_train_upper = y_train + margin

    margin_test = 0.1 * y_test
    y_test_lower = y_test - margin_test
    y_test_upper = y_test + margin_test

    history = weighted_model.fit(
        X_train_extended, 
        [y_train_lower, y_train_upper],
        epochs=100, 
        batch_size=32, 
        validation_split=0.2,
        callbacks=[early_stopping, model_checkpoint]
    )

    best_weighted_model = load_model(best_model_path)

    evaluation_results = best_weighted_model.evaluate(
        X_test_extended, [y_test_lower, y_test_upper]
    )

    y_pred_lower, y_pred_upper = best_weighted_model.predict(X_test_extended)

    r2_lower = r2_score(y_test_lower, y_pred_lower)
    r2_upper = r2_score(y_test_upper, y_pred_upper)

    print(f"R² dla dolnych granic (best_weighted_model): {r2_lower}")
    print(f"R² dla górnych granic (best_weighted_model): {r2_upper}")






In [20]:
train_model(1,1)

Rozpoczęcie treningu
Epoch 1/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 1946052460544.0000 - lower_output_loss: 779929845760.0000 - lower_output_mse: 779930763264.0000 - upper_output_loss: 1166120714240.0000 - upper_output_mae: 949624.6250 - val_loss: 272283598848.0000 - val_lower_output_loss: 98222112768.0000 - val_lower_output_mse: 98305212416.0000 - val_upper_output_loss: 173863550976.0000 - val_upper_output_mae: 334072.0938
Epoch 2/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 136311537664.0000 - lower_output_loss: 50839941120.0000 - lower_output_mse: 50839969792.0000 - upper_output_loss: 85471477760.0000 - upper_output_mae: 217566.9219 - val_loss: 62205972480.0000 - val_lower_output_loss: 24945448960.0000 - val_lower_output_mse: 25005514752.0000 - val_upper_output_loss: 37108961280.0000 - val_upper_output_mae: 130597.5391
Epoch 3/100
[1m598/598[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0

In [2]:
import os
import pandas as pd
import json
import numpy as np
from django.conf import settings
from django.db import connection
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model, load_model
from keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pickle


def remove_outliers_mad(df):
    df_out = pd.DataFrame()
    for key, subdf in df.groupby('city'):
        median = np.median(subdf.price_per_sq)
        mad = 1.4826 * np.median(np.abs(subdf.price_per_sq - median))
        threshold = 2.5
        reduced_df = subdf[np.abs(subdf.price_per_sq - median) / mad <= threshold]
        df_out = pd.concat([df_out, reduced_df], ignore_index=True)
    return df_out


def add_weighted_features(X, correlation, X_columns):
    weights = np.array([correlation.get(col, 1) for col in X_columns])
    weighted_features = X * weights
    return np.concatenate((X, weighted_features), axis=1)


def train_model(data_period, status):
    
    data = pd.read_csv("Updated_Houses.csv", encoding = "utf-8")
    data = remove_outliers_mad(data)

    dummies = pd.get_dummies(data.city)
    prepared_df = pd.concat([data,dummies],axis='columns')
    dummies = pd.get_dummies(data.district)
    prepared_df = pd.concat([prepared_df,dummies],axis='columns')

    prepared_df.columns = prepared_df.columns.str.lower()
    data_to_corr = prepared_df.drop(["district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X = prepared_df.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X_columns = X.columns
    y = prepared_df.price
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    correlation = data_to_corr.corr()['price'].abs()

    X_train_extended = add_weighted_features(X_train_scaled, correlation, X_train.columns)
    X_test_extended = add_weighted_features(X_test_scaled, correlation, X_train.columns)

    print("Rozpoczęcie treningu")

    input_layer = Input(shape=(X_train_extended.shape[1],))
    dense1 = Dense(128, activation='relu')(input_layer)
    dense2 = Dense(64, activation='relu')(dense1)
    dense3 = Dense(32, activation='relu')(dense2)

    output_lower = Dense(1, name='lower_output')(dense3)
    output_upper = Dense(1, name='upper_output')(dense3)

    weighted_model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
    weighted_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=10, 
        restore_best_weights=True
    )

    best_model_path = f"m1.keras"

    model_checkpoint = ModelCheckpoint(
        best_model_path, 
        monitor='val_loss', 
        save_best_only=True, 
        save_weights_only=False
    )

    margin = 0.1 * y_train
    y_train_lower = y_train - margin
    y_train_upper = y_train + margin

    margin_test = 0.1 * y_test
    y_test_lower = y_test - margin_test
    y_test_upper = y_test + margin_test

    history = weighted_model.fit(
        X_train_extended, 
        [y_train_lower, y_train_upper],
        epochs=100, 
        batch_size=32, 
        validation_split=0.2,
        callbacks=[early_stopping, model_checkpoint]
    )

    best_weighted_model = load_model(best_model_path)

    evaluation_results = best_weighted_model.evaluate(
        X_test_extended, [y_test_lower, y_test_upper]
    )

    y_pred_lower, y_pred_upper = best_weighted_model.predict(X_test_extended)

    r2_lower = r2_score(y_test_lower, y_pred_lower)
    r2_upper = r2_score(y_test_upper, y_pred_upper)

    print(f"R² dla dolnych granic (best_weighted_model): {r2_lower}")
    print(f"R² dla górnych granic (best_weighted_model): {r2_upper}")






In [3]:
train_model(1,1)

Rozpoczęcie treningu
Epoch 1/100
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 2052145152000.0000 - lower_output_loss: 819760529408.0000 - lower_output_mse: 819760660480.0000 - upper_output_loss: 1232385212416.0000 - upper_output_mae: 946328.1250 - val_loss: 284496986112.0000 - val_lower_output_loss: 102840090624.0000 - val_lower_output_mse: 102893264896.0000 - val_upper_output_loss: 181529296896.0000 - val_upper_output_mae: 290602.9375
Epoch 2/100
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 199581483008.0000 - lower_output_loss: 75819163648.0000 - lower_output_mse: 75819163648.0000 - upper_output_loss: 123762294784.0000 - upper_output_mae: 217443.9531 - val_loss: 146181242880.0000 - val_lower_output_loss: 58546601984.0000 - val_lower_output_mse: 58609303552.0000 - val_upper_output_loss: 87478452224.0000 - val_upper_output_mae: 182402.6875
Epoch 3/100
[1m719/719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3

In [4]:
import os
import pandas as pd
import json
import numpy as np
from django.conf import settings
from django.db import connection
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model, load_model
from keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pickle


def remove_outliers_boxplot(df):
    df_out = pd.DataFrame()
    for key, subdf in df.groupby('city'):
        q1 = np.percentile(subdf.price_per_sq, 25)
        q3 = np.percentile(subdf.price_per_sq, 75)
        iqr = q3 - q1
        lower_fence = q1 - 1.5 * iqr
        upper_fence = q3 + 1.5 * iqr
        reduced_df = subdf[(subdf.price_per_sq >= lower_fence) & (subdf.price_per_sq <= upper_fence)]
        df_out = pd.concat([df_out, reduced_df], ignore_index=True)
    return df_out


def add_weighted_features(X, correlation, X_columns):
    weights = np.array([correlation.get(col, 1) for col in X_columns])
    weighted_features = X * weights
    return np.concatenate((X, weighted_features), axis=1)


def train_model():
    
    data = pd.read_csv("Updated_Houses.csv", encoding = "utf-8")
    data = remove_outliers_boxplot(data)

    dummies = pd.get_dummies(data.city)
    prepared_df = pd.concat([data,dummies],axis='columns')
    dummies = pd.get_dummies(data.district)
    prepared_df = pd.concat([prepared_df,dummies],axis='columns')

    prepared_df.columns = prepared_df.columns.str.lower()
    data_to_corr = prepared_df.drop(["district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X = prepared_df.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X_columns = X.columns
    y = prepared_df.price
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    correlation = data_to_corr.corr()['price'].abs()

    X_train_extended = add_weighted_features(X_train_scaled, correlation, X_train.columns)
    X_test_extended = add_weighted_features(X_test_scaled, correlation, X_train.columns)

    print("Rozpoczęcie treningu")

    input_layer = Input(shape=(X_train_extended.shape[1],))
    dense1 = Dense(128, activation='relu')(input_layer)
    dense2 = Dense(64, activation='relu')(dense1)
    dense3 = Dense(32, activation='relu')(dense2)

    output_lower = Dense(1, name='lower_output')(dense3)
    output_upper = Dense(1, name='upper_output')(dense3)

    weighted_model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
    weighted_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=10, 
        restore_best_weights=True
    )

    best_model_path = f"m1.keras"

    model_checkpoint = ModelCheckpoint(
        best_model_path, 
        monitor='val_loss', 
        save_best_only=True, 
        save_weights_only=False
    )

    margin = 0.1 * y_train
    y_train_lower = y_train - margin
    y_train_upper = y_train + margin

    margin_test = 0.1 * y_test
    y_test_lower = y_test - margin_test
    y_test_upper = y_test + margin_test

    history = weighted_model.fit(
        X_train_extended, 
        [y_train_lower, y_train_upper],
        epochs=100, 
        batch_size=32, 
        validation_split=0.2,
        callbacks=[early_stopping, model_checkpoint]
    )

    best_weighted_model = load_model(best_model_path)

    evaluation_results = best_weighted_model.evaluate(
        X_test_extended, [y_test_lower, y_test_upper]
    )

    y_pred_lower, y_pred_upper = best_weighted_model.predict(X_test_extended)

    r2_lower = r2_score(y_test_lower, y_pred_lower)
    r2_upper = r2_score(y_test_upper, y_pred_upper)

    print(f"R² dla dolnych granic (best_weighted_model): {r2_lower}")
    print(f"R² dla górnych granic (best_weighted_model): {r2_upper}")


train_model()



Rozpoczęcie treningu
Epoch 1/100
[1m726/726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 2080340312064.0000 - lower_output_loss: 828754493440.0000 - lower_output_mse: 828754493440.0000 - upper_output_loss: 1251586473984.0000 - upper_output_mae: 963945.0625 - val_loss: 307073843200.0000 - val_lower_output_loss: 106124165120.0000 - val_lower_output_mse: 106052444160.0000 - val_upper_output_loss: 201143287808.0000 - val_upper_output_mae: 325327.8750
Epoch 2/100
[1m726/726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 207815245824.0000 - lower_output_loss: 77291708416.0000 - lower_output_mse: 77291708416.0000 - upper_output_loss: 130523488256.0000 - upper_output_mae: 236640.6094 - val_loss: 131144318976.0000 - val_lower_output_loss: 52445663232.0000 - val_lower_output_mse: 52465483776.0000 - val_upper_output_loss: 78646255616.0000 - val_upper_output_mae: 175156.3281
Epoch 3/100
[1m726/726[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2

In [5]:
import os
import pandas as pd
import json
import numpy as np
from django.conf import settings
from django.db import connection
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model, load_model
from keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import pickle


def remove_outliers_zscore(df):
    df_out = pd.DataFrame()
    for key, subdf in df.groupby('city'):
        mean = np.mean(subdf.price_per_sq)
        std_dev = np.std(subdf.price_per_sq, ddof=1)  # ddof=1 dla próby
        threshold = 3  # typowy próg Z-score
        z_scores = (subdf.price_per_sq - mean) / std_dev
        reduced_df = subdf[np.abs(z_scores) <= threshold]
        df_out = pd.concat([df_out, reduced_df], ignore_index=True)
    return df_out


def add_weighted_features(X, correlation, X_columns):
    weights = np.array([correlation.get(col, 1) for col in X_columns])
    weighted_features = X * weights
    return np.concatenate((X, weighted_features), axis=1)


def train_model():
    
    data = pd.read_csv("Updated_Houses.csv", encoding = "utf-8")
    data = remove_outliers_zscore(data)

    dummies = pd.get_dummies(data.city)
    prepared_df = pd.concat([data,dummies],axis='columns')
    dummies = pd.get_dummies(data.district)
    prepared_df = pd.concat([prepared_df,dummies],axis='columns')

    prepared_df.columns = prepared_df.columns.str.lower()
    data_to_corr = prepared_df.drop(["district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X = prepared_df.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq", "year"], axis='columns')
    X_columns = X.columns
    y = prepared_df.price
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    correlation = data_to_corr.corr()['price'].abs()

    X_train_extended = add_weighted_features(X_train_scaled, correlation, X_train.columns)
    X_test_extended = add_weighted_features(X_test_scaled, correlation, X_train.columns)

    print("Rozpoczęcie treningu")

    input_layer = Input(shape=(X_train_extended.shape[1],))
    dense1 = Dense(128, activation='relu')(input_layer)
    dense2 = Dense(64, activation='relu')(dense1)
    dense3 = Dense(32, activation='relu')(dense2)

    output_lower = Dense(1, name='lower_output')(dense3)
    output_upper = Dense(1, name='upper_output')(dense3)

    weighted_model = Model(inputs=input_layer, outputs=[output_lower, output_upper])
    weighted_model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['mse', 'mae'])

    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=10, 
        restore_best_weights=True
    )

    best_model_path = f"m1.keras"

    model_checkpoint = ModelCheckpoint(
        best_model_path, 
        monitor='val_loss', 
        save_best_only=True, 
        save_weights_only=False
    )

    margin = 0.1 * y_train
    y_train_lower = y_train - margin
    y_train_upper = y_train + margin

    margin_test = 0.1 * y_test
    y_test_lower = y_test - margin_test
    y_test_upper = y_test + margin_test

    history = weighted_model.fit(
        X_train_extended, 
        [y_train_lower, y_train_upper],
        epochs=100, 
        batch_size=32, 
        validation_split=0.2,
        callbacks=[early_stopping, model_checkpoint]
    )

    best_weighted_model = load_model(best_model_path)

    evaluation_results = best_weighted_model.evaluate(
        X_test_extended, [y_test_lower, y_test_upper]
    )

    y_pred_lower, y_pred_upper = best_weighted_model.predict(X_test_extended)

    r2_lower = r2_score(y_test_lower, y_pred_lower)
    r2_upper = r2_score(y_test_upper, y_pred_upper)

    print(f"R² dla dolnych granic (best_weighted_model): {r2_lower}")
    print(f"R² dla górnych granic (best_weighted_model): {r2_upper}")


train_model()



Rozpoczęcie treningu
Epoch 1/100
[1m741/741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 2089507618816.0000 - lower_output_loss: 835392176128.0000 - lower_output_mse: 835392765952.0000 - upper_output_loss: 1254114066432.0000 - upper_output_mae: 956770.8750 - val_loss: 201506340864.0000 - val_lower_output_loss: 75492671488.0000 - val_lower_output_mse: 75600928768.0000 - val_upper_output_loss: 125691936768.0000 - val_upper_output_mae: 246812.6562
Epoch 2/100
[1m741/741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 170042654720.0000 - lower_output_loss: 66681982976.0000 - lower_output_mse: 66682023936.0000 - upper_output_loss: 103360536576.0000 - upper_output_mae: 205902.4688 - val_loss: 145093738496.0000 - val_lower_output_loss: 58189598720.0000 - val_lower_output_mse: 58351566848.0000 - val_upper_output_loss: 86498328576.0000 - val_upper_output_mae: 176471.1094
Epoch 3/100
[1m741/741[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

def train_random_forest_model():
    
    # Wczytanie danych
    data = pd.read_csv("Updated_Houses.csv", encoding = "utf-8")
    data = remove_outliers(data)

    # Przekształcenie zmiennych kategorycznych na zmienne dummy
    dummies = pd.get_dummies(data.city)
    prepared_df = pd.concat([data, dummies], axis='columns')
    dummies = pd.get_dummies(data.district)
    prepared_df = pd.concat([prepared_df, dummies], axis='columns')

    # Przygotowanie danych
    prepared_df.columns = prepared_df.columns.str.lower()
    X = prepared_df.drop(['price', "district", "update_date", "city", "offer_url", "price_per_sq"], axis='columns')
    y = prepared_df['price']

    # Podział na zbiór treningowy i testowy
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Skalowanie danych
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Model Random Forest Regressor
    model = RandomForestRegressor(n_estimators=100, random_state=42)

    print("Rozpoczęcie treningu")

    # Trenowanie modelu Random Forest
    model.fit(X_train_scaled, y_train)

    # Predykcja
    y_pred = model.predict(X_test_scaled)

    # Ocena modelu
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    print(f"R² dla modelu Random Forest: {r2}")
    print(f"Mean Squared Error (MSE): {mse}")

    # Możliwość zapisania modelu na później (opcjonalnie)
    # import joblib
    # joblib.dump(model, 'random_forest_model.pkl')



In [18]:
train_random_forest_model()

Rozpoczęcie treningu
R² dla modelu Random Forest: 0.9496309221378502
Mean Squared Error (MSE): 10423830382.704222
