<a href="https://colab.research.google.com/github/DaeSeokSong/MachineLearningModels/blob/main/LSTM_PRVS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Price Rationality Verification System

* 가격 합리성 검증 시스템

### Google Drive Mount

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

%cd /content/gdrive/MyDrive/DeepLearning/Project/PRVS
!ls -al

Mounted at /content/gdrive
/content/gdrive/MyDrive/DeepLearning/Project/PRVS
total 75
drwx------ 2 root root  4096 Jan 23 02:35 Dataset
-rw------- 1 root root 72488 Feb  5 15:14 LSTM-PRVS.ipynb


### Install

In [2]:
!pip install beautifulsoup4



### Import

In [3]:
# Crawling
import requests
from bs4 import BeautifulSoup

# Modeling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import metrics, losses
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, Dense
from sklearn.model_selection import train_test_split

# ETC
from collections import Counter

### Global Variable

In [33]:
# LED가로등기구, 39111603
CLASSIFI_NUM = 39111603
IDENTITY_NUM = 23435411

### Main

In [40]:
# 나라장터 가격 크롤링 불가, 배추김치(포기김치, 송이식품) 1kg 가격
price_go = 365000

# 네이버 가격 크롤링
url_naver = 'https://search.shopping.naver.com/search/all?query=LED%EA%B0%80%EB%A1%9C%EB%93%B1%EA%B8%B0%EA%B5%AC&cat_id=&frm=NVSHATC'
response = requests.get(url_naver)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    ul = soup.select_one('#__next > div > div.style_container__1YjHN > div')

    prices = ul.select('div > div > ul > div > div > li > div > div > div > strong > span > span')
    for idx, price in enumerate(prices) : 
        prices[idx] = int(price.get_text().split(',')[0] + price.get_text().split(',')[1][:-1])

    prices = np.array(prices)
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 최대값 제거 ##########')
    prices = np.delete(prices, np.where(prices == max(prices)))
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    """
    print('\n########## 최소값 제거 ##########')
    prices = np.delete(prices, np.where(prices == min(prices)))
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)
    """

    print('\n########## 결과값 ##########')
    mean = int(round(np.mean(prices), -1))
    print('나라장터 가격 = ', price_go)
    print('네이버쇼핑 평균가 = ', mean)
    print('가격 차이 = ', abs(price_go - mean))
else : 
    print(response.status_code)

[336000 226000 188000 156000 156000]
평균 =  212400.0
표준편차 =  66961.48146509305
변동 계수 =  31.526121217087123

########## 최대값 제거 ##########
[226000 188000 156000 156000]
평균 =  181500.0
표준편차 =  28822.734082664676
변동 계수 =  15.88029426042131

########## 결과값 ##########
나라장터 가격 =  365000
네이버쇼핑 평균가 =  181500
가격 차이 =  183500


In [34]:
df = pd.read_csv('./Dataset/item_registration_history_202201.csv')

itmClsNmb = df['물품분류번호']
itmIdx1 = np.where(itmClsNmb == CLASSIFI_NUM)[0]
itmIdnNmb = df['물품식별번호']
itmIdx2 = np.where(itmIdnNmb == IDENTITY_NUM)[0]
itmIdx = [idx for idx in itmIdx1 if idx in itmIdx2]

untPrc = [int(uprc) for uprc in df['단가'][itmIdx]]

print(untPrc)
print(len(untPrc))

[365000]
1


  exec(code_obj, self.user_global_ns, self.user_ns)


In [35]:
df = pd.read_csv('./Dataset/details_of_delivery_request_202201.csv', encoding='cp949')

itmClsNmb = df['물품분류번호']
itmIdx1 = np.where(itmClsNmb == CLASSIFI_NUM)[0]
itmIdnNmb = df['물품식별번호']
itmIdx2 = np.where(itmIdnNmb == IDENTITY_NUM)[0]
itmIdx = [idx for idx in itmIdx1 if idx in itmIdx2]

untPrc = [int(id) for idx, id in enumerate(df['단가']) if idx in itmIdx]

incDcr = [int(id) for idx, id in enumerate(df['증감수량']) if idx in itmIdx]
incDcrAmn = [int(ida) for idx, ida in enumerate(df['증감금액']) if idx in itmIdx]
incDcrDlvRqrQnt = [int(iddrq) for idx, iddrq in enumerate(df['증감납품요구수량']) if idx in itmIdx]
incDcrDlvRqsAmn = [int(iddra) for idx, iddra in enumerate(df['증감납품요구금액']) if idx in itmIdx]

excPrd = [ep for idx, ep in enumerate(df['우수제품여부']) if idx in itmIdx]
for idx, ny in enumerate(excPrd) :
    if ny == 'N' : excPrd[idx] = 0
    else : excPrd[idx] = 1
cmpAmnSms = [cas for idx, cas in enumerate(df['중소기업자간경쟁제품여부']) if idx in itmIdx]
for idx, ny in enumerate(cmpAmnSms) :
    if ny == 'N' : cmpAmnSms[idx] = 0
    else : cmpAmnSms[idx] = 1
drcPrc = [dp for idx, dp in enumerate(df['공사용자재직접구매대상여부']) if idx in itmIdx]
for idx, ny in enumerate(drcPrc) :
    if ny == 'N' : drcPrc[idx] = 0
    else : drcPrc[idx] = 1
mltSplCnt = [msc for idx, msc in enumerate(df['다수공급자계약여부']) if idx in itmIdx]
for idx, ny in enumerate(mltSplCnt) :
    if ny == 'N' : mltSplCnt[idx] = 0
    else : mltSplCnt[idx] = 1

print(untPrc)
print(len(untPrc))
print("\n")
print(incDcr)
print(len(incDcr))
print(incDcrAmn)
print(len(incDcrAmn))
print(incDcrDlvRqrQnt)
print(len(incDcrDlvRqrQnt))
print(incDcrDlvRqsAmn)
print(len(incDcrDlvRqsAmn))
print("\n")
print(excPrd)
print(len(excPrd))
print(cmpAmnSms)
print(len(cmpAmnSms))
print(drcPrc)
print(len(drcPrc))
print(mltSplCnt)
print(len(mltSplCnt))

  exec(code_obj, self.user_global_ns, self.user_ns)


[365000, 365000, 365000, 365000]
4


[1, 56, 35, 6]
4
[365000, 20440000, 12775000, 2190000]
4
[1, 56, 98, 6]
4
[365000, 20440000, 39345000, 2190000]
4


[1, 1, 1, 1]
4
[1, 1, 1, 1]
4
[1, 1, 1, 1]
4
[0, 0, 0, 0]
4


In [36]:
df = pd.read_csv('./Dataset/detailed_tally_of_delivery_requests_by_product_specification_202201.csv')

itmClsNmb = df['물품분류번호']
itmIdx1 = np.where(itmClsNmb == CLASSIFI_NUM)[0]
itmIdnNmb = df['물품식별번호']
itmIdx2 = np.where(itmIdnNmb == IDENTITY_NUM)[0]
itmIdx = [idx for idx in itmIdx1 if idx in itmIdx2]

ttlQnt = [int(tq) for idx, tq in enumerate(df['수량합계']) if idx in itmIdx]
ttlAmn = [int(ta) for idx, ta in enumerate(df['금액합계']) if idx in itmIdx if not ta == 0]
avrUntPrc = [int(aup) for idx, aup in enumerate(df['평균단가']) if idx in itmIdx if not pd.isna(aup)]

print(untPrc)
print(len(untPrc))
print(ttlAmn)
print(len(ttlAmn))
print(avrUntPrc)
print(len(avrUntPrc))

[365000, 365000, 365000, 365000]
4
[35770000]
1
[365000]
1
