In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import requests
import time
import csv
import numpy as np
from datetime import timedelta
from bs4 import BeautifulSoup
from prophet import Prophet
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
import joblib
import psycopg2 #java 연결

In [4]:
data = pd.read_csv("bunjang_apple_watch_se2_price_condition_numeric_QCR.csv")
data

Unnamed: 0,state,Price
0,4,250000.0
1,5,360000.0
2,2,210000.0
3,4,200000.0
4,4,300000.0
...,...,...
971,2,240000.0
972,2,350000.0
973,2,280000.0
974,2,210000.0


In [5]:
X = data[['state']]  # Condition을 feature로 사용
y = data['Price']        # Price를 target으로 사용

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


In [7]:
y_pred = rf_model.predict(X_test)


In [8]:
joblib.dump(rf_model, 'Random_Forest_Model.pkl')

['Random_Forest_Model.pkl']

In [9]:
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 2205723994.7404337


In [50]:
 for condition in sorted(data['State'].unique(), reverse=True):
    condition_array = [[condition]]  # Condition 값을 사용하여 예측
    predicted_price = rf_model.predict(condition_array)
    lower_bound = predicted_price - (predicted_price * 0.3)  # 하한 (30% 낮게)
    upper_bound = predicted_price + (predicted_price * 0.3)  # 상한 (30% 높게)
    print(f"Condition {condition}: Predicted Price = {predicted_price[0]:,.0f}, "
          f"Lower Bound = {lower_bound[0]:,.0f}, Upper Bound = {upper_bound[0]:,.0f}")

KeyError: 'State'

# DB 연결

In [51]:
db_params = {
    'dbname': 'graduate_task',
    'user': 'g1',
    'password': '0000',
    'host': 'localhost',  # 실제 사용 중인 호스트 정보
    'port': '5432'
}

# 상태를 숫자로 매핑
status_mapping = {
    '새상품': 5,
    '사용감 없음': 4,
    '사용감 적음': 3,
    '사용감 많은': 2,
    '중고': 2,
    '고장/파손 상품': 1
}


def insert_category_price_data(category_id, max_price, min_price, status):
    connection = None
    cursor = None
    try:
        # PostgreSQL에 연결
        connection = psycopg2.connect(**db_params)
        cursor = connection.cursor()
        
        # 데이터 삽입 SQL 쿼리 작성
        insert_query = """
        INSERT INTO category_price (category_no, max_price, min_price, status) 
        VALUES (%s, %s, %s, %s);
        """
        
         # 데이터 삽입 실행
        cursor.execute(insert_query, (category_id, max_price, min_price, status))
        
        # 커밋하여 변경사항 저장
        connection.commit()
        print(f"Data inserted for category_id: {category_id}")
    
    except Exception as error:
        print(f"Error inserting data: {error}")

    finally:
        
         # 리소스 해제
         if cursor:
             cursor.close()
         if connection:
             connection.close()

# 예측된 데이터를 넣어보는 예시 (테스트 데이터)
insert_category_price_data(1, 500000, 300000, '2')

Data inserted for category_id: 1
