In [1]:
import joblib
import pickle
import pandas as pd
import numpy as np

In [2]:
import sklearn
print(sklearn.__version__)

1.2.2


In [3]:
with open("random_forest.joblib", "rb") as fr:
    model = joblib.load(fr);

In [4]:
with open("le_gu.pkl", "rb") as fr:
    le_gu = pickle.load(fr);
with open("le_dong.pkl", "rb") as fr:
    le_dong = pickle.load(fr);

# 학습 데이터
- dealDate: string->dateTime->int
- interestRate: float
- gu: string->int
- dong: string->int
- exclusiveArea: float
- floor: int
- buildYear: int

# 출력
- dealAmount: float (만원 단위)

In [10]:
from datetime import datetime, timedelta
def make_x_df(gu, dong, exclusiveArea, floor, buildYear):
    current_date = datetime.now()
    deal_dates = []
    for i in range(12):
        month = current_date.month - i
        year = current_date.year
        if month <= 0:
            month += 12
            year -= 1
        deal_dates.append(f"{year}-{month:02d}-15")
    interest_rates = make_ir(deal_dates)
    
    x_df = pd.DataFrame({
        "dealDate": deal_dates,
        "interestRate": interest_rates,
        "gu": [gu] * 12,
        "dong": [dong] * 12,
        "exclusiveArea": [exclusiveArea] * 12,
        "floor": [floor] * 12,
        "buildYear": [buildYear] * 12
    })
    return x_df
    
# 날짜 기준으로 금리값 넣어주는 함수
# 최근 10.11에 변동 외에는 없음. 금리 변동시마다 그때그때 갱신 필요
def make_ir(deal_dates):
    ir_values = []
    reference_date = datetime(2024, 10, 11)
    
    for date_str in deal_dates:
        date = datetime.strptime(date_str, "%Y-%m-%d")
        if date >= reference_date:
            ir_values.append(3.25)
        else:
            ir_values.append(3.5)
    
    return ir_values

In [11]:
# test
gu = "강남구"
dong = "역삼동"
exclusiveArea = 59.73
floor = 8
buildYear = 2005

testdf = make_x_df(gu, dong, exclusiveArea, floor, buildYear)

In [12]:
testdf["gu"] = le_gu.transform(testdf["gu"])
testdf["dong"] = le_dong.transform(testdf["dong"])
testdf

Unnamed: 0,dealDate,interestRate,gu,dong,exclusiveArea,floor,buildYear
0,2024-11-15,3.25,0,207,59.73,8,2005
1,2024-10-15,3.25,0,207,59.73,8,2005
2,2024-09-15,3.5,0,207,59.73,8,2005
3,2024-08-15,3.5,0,207,59.73,8,2005
4,2024-07-15,3.5,0,207,59.73,8,2005
5,2024-06-15,3.5,0,207,59.73,8,2005
6,2024-05-15,3.5,0,207,59.73,8,2005
7,2024-04-15,3.5,0,207,59.73,8,2005
8,2024-03-15,3.5,0,207,59.73,8,2005
9,2024-02-15,3.5,0,207,59.73,8,2005


In [16]:
start_date = pd.to_datetime('2006-01-01')
testdf['dealDate'] = pd.to_datetime(testdf['dealDate'])
testdf['dealDate'] = (testdf['dealDate'] - start_date).dt.days
testdf

In [18]:
model.predict(testdf)

array([205364., 205364., 205279., 202593., 200785., 193850., 193605.,
       190700., 188825., 188580., 187625., 186915.])

In [61]:
dealDate = "2024-09-18"
interestRate = 3.25
gu = "강남구"
dong = "역삼동"
exclusiveArea = 59.73
floor = 8
buildYear = 2005

data = {
    "dealDate": [dealDate],
    "interestRate": [interestRate],
    "gu": [gu],
    "dong": [dong],
    "exclusiveArea": [exclusiveArea],
    "floor": [floor],
    "buildYear": [buildYear]
}

testdf = pd.DataFrame(data)

In [62]:
start_date = pd.to_datetime('2006-01-01')
testdf['dealDate'] = pd.to_datetime(testdf['dealDate'])
testdf['dealDate'] = (testdf['dealDate'] - start_date).dt.days

testdf["gu"] = le_gu.transform(testdf["gu"])
testdf["dong"] = le_dong.transform(testdf["dong"])
testdf

model.predict(testdf)

array([205364.])