In [99]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score

In [100]:
data = pd.read_csv('./latitude_longitude_data.csv', encoding='gbk')

In [101]:

data['electricity'] = data['electricity'].replace({'商电': 0, '民电': 1})
data['water'] = data['water'].replace({'商水': 0, '民水': 1})

data.loc[data['electricity'] == '暂无数据', 'electricity'] = data['water']
data.loc[data['water'] == '暂无数据', 'water'] = data['electricity']

data['water'] = data['water'].replace({'暂无数据': 1})
data['electricity'] = data['electricity'].replace({'暂无数据': 1})

len(data)



12701

In [102]:
data.drop(columns=['description', 'facility', 'stall',	'tags',	'title', 'heating',	'house_code',	'house_type',	'lease',	'location',	'orientation','longitude','latitude'], inplace=True)

In [103]:
data.head()

Unnamed: 0,area,electricity,elevator,floor,fuel_gas,price,water,bedroom,living room,bathroom,...,Washing Machine,Air Conditioner,Wardrobe,Television,Refrigerator,Water Heater,Bed,Heating,Broadband,Natural Gas
0,90.0,0,1,16,0,4000,0,1.0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,80.0,0,1,43,0,4000,0,1.0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,68.0,0,1,16,0,2800,0,1.0,1,1,...,0,0,0,0,0,0,0,0,0,0
3,38.0,1,1,23,1,2500,1,1.0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,96.0,0,1,43,0,4320,0,1.0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [104]:
# data.to_csv('simulation.csv', index=False, encoding="UTF-8")

In [105]:
X = pd.get_dummies(data.drop('price', axis=1), columns=['district'])

imputer = SimpleImputer(strategy='mean') 
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

y = data['price']

model = LinearRegression()
model.fit(X_imputed, y)

coefficients = model.coef_
intercept = model.intercept_

feature_coefficients = pd.DataFrame({'Feature': X_imputed.columns, 'Coefficient': coefficients})

print(feature_coefficients)
print(f'Intercept: {intercept}')


                        Feature  Coefficient
0                          area    27.391187
1                   electricity   235.082561
2                      elevator    16.002559
3                         floor   -14.966027
4                      fuel_gas  -233.735482
5                         water  -260.874963
6                       bedroom   852.566826
7                   living room -1540.804663
8                      bathroom    41.679915
9               Washing Machine   -92.048878
10              Air Conditioner   -38.136152
11                     Wardrobe  -130.108568
12                   Television   418.730466
13                 Refrigerator   -24.651048
14                 Water Heater  -150.491822
15                          Bed  -559.846111
16                      Heating   100.718467
17                    Broadband   143.977587
18                  Natural Gas   284.085581
19     district_Doumen District -1916.270057
20    district_Hengqin District  2872.847378
21  distri

In [109]:
def predict_price(area, electricity, elevator, floor, fuel_gas, water, bedroom, living_room, bathroom, washing_machine, air_conditioner, wardrobe, television, refrigerator, water_heater, bed, heating, broadband, natural_gas, district):
 
    input_features = {
        'area': [area],
        'electricity': [electricity],
        'elevator': [elevator],
        'floor': [floor],
        'fuel_gas': [fuel_gas],
        'water': [water],
        'bedroom': [bedroom],
        'living room': [living_room],
        'bathroom': [bathroom],
        'Washing Machine': [washing_machine],
        'Air Conditioner': [air_conditioner],
        'Wardrobe': [wardrobe],
        'Television': [television],
        'Refrigerator': [refrigerator],
        'Water Heater': [water_heater],
        'Bed': [bed],
        'Heating': [heating],
        'Broadband': [broadband],
        'Natural Gas': [natural_gas]
    }

    for d in ['Doumen District', 'Hengqin District', 'High-Tech District', 'Jinwan District', 'Xiangzhou District']:
        input_features[f'district_{d}'] = [1 if district == d else 0]

    input_df = pd.DataFrame.from_dict(input_features)

    predicted_price = model.predict(input_df)
    return predicted_price[0]

# 示例调用
price = predict_price(120, 1, 1, 10, 1, 1, 2, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 'Xiangzhou District')
print(f'predict: {price}')


predict: 5860.030997892968


In [None]:
def predict_price(area, electricity, elevator, floor, fuel_gas, water, bedroom, living_room, bathroom, washing_machine, air_conditioner, wardrobe, television, refrigerator, water_heater, bed, heating, broadband, natural_gas, district):

    coefficients = {
        'intercept': 943.8428797033107,
        'area': 30.428346,
        'electricity': -87.242346,
        'elevator': 78.518876,
        'floor': 11.977505,
        'fuel_gas': 145.242987,
        'water': 367.167757,
        'bedroom': -79.017383,
        'living room': -514.359850,
        'bathroom': -82.373385,
        'Washing Machine': -84.367002,
        'Air Conditioner': -71.662535,
        'Wardrobe': -125.300425,
        'Television': 182.750708,
        'Refrigerator': 146.004075,
        'Water Heater': -99.820121,
        'Bed': -46.602452,
        'Heating': 167.745168,
        'Broadband': 55.104968,
        'Natural Gas': 144.230935,
        'district_Doumen District': -1130.702170,
        'district_Hengqin District': 1289.862982,
        'district_High-Tech District': -8.756755,
        'district_Jinwan District': -585.144251,
        'district_Xiangzhou District': 434.740194
    }

    price = coefficients['intercept']
    price += coefficients['area'] * area
    price += coefficients['electricity'] * electricity
    price += coefficients['elevator'] * elevator
    price += coefficients['floor'] * floor
    price += coefficients['fuel_gas'] * fuel_gas
    price += coefficients['water'] * water
    price += coefficients['bedroom'] * bedroom
    price += coefficients['living room'] * living_room
    price += coefficients['bathroom'] * bathroom
    price += coefficients['Washing Machine'] * washing_machine
    price += coefficients['Air Conditioner'] * air_conditioner
    price += coefficients['Wardrobe'] * wardrobe
    price += coefficients['Television'] * television
    price += coefficients['Refrigerator'] * refrigerator
    price += coefficients['Water Heater'] * water_heater
    price += coefficients['Bed'] * bed
    price += coefficients['Heating'] * heating
    price += coefficients['Broadband'] * broadband
    price += coefficients['Natural Gas'] * natural_gas

    district_coefficient = coefficients.get(f'district_{district}', 0)
    price += district_coefficient

    return price


price = predict_price(80, 1, 1, 10, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 'Xiangzhou District')
print(f'predict price: {price}')


predict price: 3949.6283957033115
