In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.linear_model import GLS, OLS
from scipy import stats

In [12]:
df_e = pd.read_csv('final_encoded_with_distance.csv')

In [13]:
df_e = df_e.drop(columns=['latitude'])
df_e = df_e.drop(columns=['longitude'])

In [14]:
df_e['log_distance'] = np.log(df_e['distance'])

In [15]:
df_e = df_e.drop(columns=['distance'])

In [16]:
area_dummy_columns = [col for col in df_e.columns if col.startswith('district_')]

df_e = df_e.drop(columns=area_dummy_columns)

In [17]:
area_dummy_columns = [col for col in df_e.columns if col.startswith('area_')]

df_e = df_e.drop(columns=area_dummy_columns)

In [18]:
def ramsey_reset_test_wls(model, degree=3):
    """
    Тест Рамсея (RESET) для WLS модели
    """
    try:
        y_pred = model.predict()
        X = model.model.exog.copy()

        # Добавляем степени предсказаний
        for i in range(2, degree + 1):
            X = np.column_stack((X, y_pred**i))

        # Веса из исходной модели
        weights = model.model.weights

        # Оцениваем вспомогательную модель
        aux_model = sm.WLS(model.model.endog, X, weights=weights).fit()  # Изменено на WLS

        # Вычисляем F-статистику
        q = degree - 1
        rss_r = model.ssr
        rss_ur = aux_model.ssr
        df_num = q
        df_den = aux_model.df_resid

        f_stat = ((rss_r - rss_ur)/df_num) / (rss_ur/df_den)
        p_value = 1 - stats.f.cdf(f_stat, df_num, df_den)

        return {
            'f_statistic': f_stat,
            'p_value': p_value,
            'df_num': df_num,
            'df_den': df_den
        }
    except Exception as e:
        print(f"Ошибка теста: {str(e)}")
        return None

# Загрузка данных
X = df_e.drop("price", axis=1)
y = np.log(df_e["price"])

# Шаг 1: Оценка OLS для исходного y
ols_model = sm.OLS(y, X).fit()
print("\nOLS модель для исходного y:")
print(ols_model.summary())

# Шаг 2: Расчет весов
weights = 1 / (ols_model.resid ** 2 + 1e-6)

# Шаг 3: Оценка WLS моделей
wls_model = sm.WLS(y, X, weights=weights).fit()  # Изменено на WLS


# Проведение теста Рамсея
print("\nТест Рамсея для WLS (исходный y):")
test_result = ramsey_reset_test_wls(wls_model, degree=3)  # Обновлен вызов функции
if test_result:
    print(f"F: {test_result['f_statistic']:.4f}, p-value: {test_result['p_value']:.4f}")


OLS модель для исходного y:
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.609
Model:                            OLS   Adj. R-squared:                  0.602
Method:                 Least Squares   F-statistic:                     92.18
Date:                Sat, 12 Apr 2025   Prob (F-statistic):          8.05e-247
Time:                        10:06:23   Log-Likelihood:                 6.8087
No. Observations:                1326   AIC:                             32.38
Df Residuals:                    1303   BIC:                             151.8
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------

In [20]:
df_e.to_csv('FINAL_data.csv', index=False)
print("Файл сохранён как FINAL_data.csv")

Файл сохранён как FINAL_data.csv
