<a href="https://colab.research.google.com/github/DaeSeokSong/MachineLearningModels/blob/main/LSTM_PRVS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Price Rationality Verification System

* 가격 합리성 검증 시스템

### Google Drive Mount

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

%cd /content/gdrive/MyDrive/DeepLearning/Project/PRVS
!ls -al

Mounted at /content/gdrive
/content/gdrive/MyDrive/DeepLearning/Project/PRVS
total 24
drwx------ 2 root root  4096 Jan 23 02:35 Dataset
-rw------- 1 root root 20470 Feb  3 13:30 LSTM-PRVS.ipynb


### Install

In [2]:
!pip install beautifulsoup4



### Import

In [3]:
# Crawling
import requests
from bs4 import BeautifulSoup

# Modeling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import metrics, losses
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, Dense
from sklearn.model_selection import train_test_split

# ETC
from collections import Counter

### Global Variable

### Main

In [None]:
# 나라장터 가격 크롤링 불가, 배추김치(포기김치, 송이식품) 1kg 가격
price_go = 3500

# 네이버 가격 크롤링
url_naver = 'https://search.shopping.naver.com/search/all?where=all&frm=NVSCTAB&query=%EB%B0%B0%EC%B6%94%EA%B9%80%EC%B9%98%2C+%EC%86%A1%EC%9D%B4%EC%8B%9D%ED%92%88%2C+%EB%A7%9B%EA%B9%80%EC%B9%98%2C+1kg'
response = requests.get(url_naver)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    ul = soup.select_one('#__next > div > div.style_container__1YjHN > div')

    prices = ul.select('div > div > ul > div > div > li > div > div > div > strong > span > span')
    for idx, price in enumerate(prices) : 
        prices[idx] = int(price.get_text().split(',')[0] + price.get_text().split(',')[1][:-1])

    prices = np.array(prices)
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 최대값 제거 ##########')
    prices = np.delete(prices, np.where(prices == max(prices)))
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 최소값 제거 ##########')
    prices = np.delete(prices, np.where(prices == min(prices)))
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 결과값 ##########')
    mean = int(round(np.mean(prices), -1))
    print('나라장터 가격 = ', price_go)
    print('네이버쇼핑 평균가 = ', mean)
    print('가격 차이 = ', abs(price_go - mean))
else : 
    print(response.status_code)

[ 7700  9900 13000 13900 25900]
평균 =  14080.0
표준편차 =  6310.118857834613
변동 계수 =  44.8161850698481

########## 최대값 제거 ##########
[ 7700  9900 13000 13900]
평균 =  11125.0
표준편차 =  2472.2206616724166
변동 계수 =  22.22220819480824

########## 최소값 제거 ##########
[ 9900 13000 13900]
평균 =  12266.666666666666
표준편차 =  1713.3463034528531
변동 계수 =  13.967497039017825

########## 결과값 ##########
나라장터 가격 =  3500
네이버쇼핑 평균가 =  12270
가격 차이 =  8770


In [62]:
df = pd.read_csv('./Dataset/item_registration_history_202201.csv', index_col=0)

itmClsNmb = df['물품분류번호'] # LED가로등기구, 39111603
itmIdx = np.where(itmClsNmb == 39111603)[0]
untPrc = [int(uprc) for uprc in df['단가'][itmIdx]]
unt = [u for u in df['단위'][itmIdx]]

print(untPrc)
print(len(untPrc))
print(unt)
print(len(unt))

[410000, 440000, 410000, 490000, 390000, 550000, 550000, 790000, 480000, 800000, 460000, 490000, 450000, 590000, 490000, 490000, 560000, 297000, 297000, 570000, 550000, 460000, 580000, 690000, 470000, 460000, 630000, 630000, 580000, 550000, 520000, 440000, 450000, 400000, 350000, 390000, 490000, 600000, 690000, 640000, 490000, 590000, 690000, 880000, 620000, 530000, 510000, 520000, 450000, 760000, 430000, 415000, 658000, 330000, 450000, 528000, 418000, 335000, 503000, 410000, 429000, 436000, 475000, 440000, 370000, 310000, 478000, 465000, 455000, 462000, 328000, 250000, 400000, 355000, 338000, 627000, 369000, 638000, 289000, 295000, 462000, 398000, 380000, 380000, 510000, 480000, 420000, 385000, 360000, 486000, 320000, 440000, 480000, 440000, 370000, 425000, 309000, 442000, 480000, 530000, 410000, 703000, 394000, 585000, 525000, 374000, 460000, 561000, 572000, 594000, 330000, 517000, 352000, 451000, 390000, 500000, 325000, 424600, 272000, 500000, 500000, 400000, 300000, 517000, 570000,

  exec(code_obj, self.user_global_ns, self.user_ns)


In [70]:
df = pd.read_csv('./Dataset/details_of_delivery_request_202201.csv', index_col=0, encoding='cp949')

itmClsNmb = df['물품분류번호'] # LED가로등기구, 39111603
itmIdx = np.where(itmClsNmb == 39111603)[0]

untPrc = [int(id) for idx, id in enumerate(df['단가']) if idx in itmIdx]

incDcr = [int(id) for idx, id in enumerate(df['증감수량']) if idx in itmIdx]
incDcrAmn = [int(ida) for idx, ida in enumerate(df['증감금액']) if idx in itmIdx]
incDcrDlvRqrQnt = [int(iddrq) for idx, iddrq in enumerate(df['증감납품요구수량']) if idx in itmIdx]
incDcrDlvRqsAmn = [int(iddra) for idx, iddra in enumerate(df['증감납품요구금액']) if idx in itmIdx]

excPrd = [ep for idx, ep in enumerate(df['우수제품여부']) if idx in itmIdx]
cmpAmnSms = [cas for idx, cas in enumerate(df['중소기업자간경쟁제품여부']) if idx in itmIdx]
drcPrc = [dp for idx, dp in enumerate(df['공사용자재직접구매대상여부']) if idx in itmIdx]
mltSplCnt = [msc for idx, msc in enumerate(df['다수공급자계약여부']) if idx in itmIdx]

print(untPrc)
print(len(untPrc))
print("\n")
print(incDcr)
print(len(incDcr))
print(incDcrAmn)
print(len(incDcrAmn))
print(incDcrDlvRqrQnt)
print(len(incDcrDlvRqrQnt))
print(incDcrDlvRqsAmn)
print(len(incDcrDlvRqsAmn))
print("\n")
print(excPrd)
print(len(excPrd))
print(cmpAmnSms)
print(len(cmpAmnSms))
print(drcPrc)
print(len(drcPrc))
print(mltSplCnt)
print(len(mltSplCnt))

  exec(code_obj, self.user_global_ns, self.user_ns)


[425000, 412500, 581000, 380000, 430000, 380000, 307000, 720000, 430000, 395000, 430000, 300000, 363000, 425000, 425000, 308000, 360000, 660000, 600000, 370000, 420000, 395000, 480000, 365000, 440000, 400000, 425000, 370000, 370000, 380000, 370000, 440000, 440000, 580000, 330000, 390000, 331000, 450000, 410000, 400000, 433000, 450000, 600000, 460000, 420000, 313000, 660000, 433000, 350000, 585000, 416000, 239500, 400000, 470000, 550000, 496000, 390000, 416000, 416000, 380000, 350000, 540000, 340000, 198900, 227700, 330000, 350000, 570000, 340000, 370000, 295000, 390000, 395000, 580000, 358000, 380000, 255000, 400000, 390000, 328000, 420000, 323000, 450000, 451000, 506000, 450000, 219600, 300000, 498000, 430000, 350000, 420000, 334000, 450000, 365000, 320000, 340000, 405000, 445000, 380000, 370000, 275000, 540000, 464000, 370000, 420000, 420000, 300000, 420000, 370000, 440000, 450000, 395000, 380000, 410000, 584000, 330000, 426000, 414000, 491000, 360000, 760000, 330000, 475000, 432000,

In [73]:
df = pd.read_csv('./Dataset/detailed_tally_of_delivery_requests_by_product_specification_202201.csv', index_col=0)

ttlQnt = [int(tq) for idx, tq in enumerate(df['수량합계']) if idx in itmIdx]
ttlAmn = [int(ta) for idx, ta in enumerate(df['금액합계']) if idx in itmIdx]
avrUntPrc = [int(aup) for idx, aup in enumerate(df['평균단가']) if idx in itmIdx]

print(untPrc)
print(len(untPrc))
print(ttlAmn)
print(len(ttlAmn))
print(avrUntPrc)
print(len(avrUntPrc))

[425000, 412500, 581000, 380000, 430000, 380000, 307000, 720000, 430000, 395000, 430000, 300000, 363000, 425000, 425000, 308000, 360000, 660000, 600000, 370000, 420000, 395000, 480000, 365000, 440000, 400000, 425000, 370000, 370000, 380000, 370000, 440000, 440000, 580000, 330000, 390000, 331000, 450000, 410000, 400000, 433000, 450000, 600000, 460000, 420000, 313000, 660000, 433000, 350000, 585000, 416000, 239500, 400000, 470000, 550000, 496000, 390000, 416000, 416000, 380000, 350000, 540000, 340000, 198900, 227700, 330000, 350000, 570000, 340000, 370000, 295000, 390000, 395000, 580000, 358000, 380000, 255000, 400000, 390000, 328000, 420000, 323000, 450000, 451000, 506000, 450000, 219600, 300000, 498000, 430000, 350000, 420000, 334000, 450000, 365000, 320000, 340000, 405000, 445000, 380000, 370000, 275000, 540000, 464000, 370000, 420000, 420000, 300000, 420000, 370000, 440000, 450000, 395000, 380000, 410000, 584000, 330000, 426000, 414000, 491000, 360000, 760000, 330000, 475000, 432000,