<a href="https://colab.research.google.com/github/DaeSeokSong/MachineLearningModels/blob/main/LSTM_PRVS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Price Rationality Verification System

* 가격 합리성 검증 시스템

### Google Drive Mount

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

%cd /content/gdrive/MyDrive/DeepLearning/Project/PRVS
!ls -al

Mounted at /content/gdrive
/content/gdrive/MyDrive/DeepLearning/Project/PRVS
total 41
drwx------ 2 root root  4096 Jan 23 02:35 Dataset
-rw------- 1 root root 37279 Jan 29 03:38 LSTM-PRVS.ipynb


### Install

In [2]:
!pip install beautifulsoup4



### Import

In [6]:
# Crawling
import requests
from bs4 import BeautifulSoup

# Modeling
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import metrics, losses
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, Dense
from sklearn.model_selection import train_test_split

# ETC
from collections import Counter

### Global Variable

### Main

In [None]:
# 나라장터 가격 크롤링 불가, 배추김치(포기김치, 송이식품) 1kg 가격
price_go = 3500

# 네이버 가격 크롤링
url_naver = 'https://search.shopping.naver.com/search/all?where=all&frm=NVSCTAB&query=%EB%B0%B0%EC%B6%94%EA%B9%80%EC%B9%98%2C+%EC%86%A1%EC%9D%B4%EC%8B%9D%ED%92%88%2C+%EB%A7%9B%EA%B9%80%EC%B9%98%2C+1kg'
response = requests.get(url_naver)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    ul = soup.select_one('#__next > div > div.style_container__1YjHN > div')

    prices = ul.select('div > div > ul > div > div > li > div > div > div > strong > span > span')
    for idx, price in enumerate(prices) : 
        prices[idx] = int(price.get_text().split(',')[0] + price.get_text().split(',')[1][:-1])

    prices = np.array(prices)
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 최대값 제거 ##########')
    prices = np.delete(prices, np.where(prices == max(prices)))
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 최소값 제거 ##########')
    prices = np.delete(prices, np.where(prices == min(prices)))
    print(prices)
    mean = np.mean(prices)
    std = np.std(prices)
    cov = std / mean * 100
    print('평균 = ', mean)
    print('표준편차 = ', std)
    print('변동 계수 = ', cov)

    print('\n########## 결과값 ##########')
    mean = int(round(np.mean(prices), -1))
    print('나라장터 가격 = ', price_go)
    print('네이버쇼핑 평균가 = ', mean)
    print('가격 차이 = ', abs(price_go - mean))
else : 
    print(response.status_code)

[ 7700  9900 13000 13900 25900]
평균 =  14080.0
표준편차 =  6310.118857834613
변동 계수 =  44.8161850698481

########## 최대값 제거 ##########
[ 7700  9900 13000 13900]
평균 =  11125.0
표준편차 =  2472.2206616724166
변동 계수 =  22.22220819480824

########## 최소값 제거 ##########
[ 9900 13000 13900]
평균 =  12266.666666666666
표준편차 =  1713.3463034528531
변동 계수 =  13.967497039017825

########## 결과값 ##########
나라장터 가격 =  3500
네이버쇼핑 평균가 =  12270
가격 차이 =  8770


In [14]:
df = pd.read_csv('./Dataset/item_registration_details.csv', index_col=0)

itmClsNmb = df['물품분류번호']
itmIdnNmb = df['물품식별번호']
untPrc = df['단가']
unt = df['단위']

Counter(itmIdnNmb).most_common(50)

  interactivity=interactivity, compiler=compiler, result=result)


[(23734462, 97),
 (23734469, 95),
 (23734464, 74),
 (23709986, 73),
 (23709989, 73),
 (23709843, 73),
 (23709842, 73),
 (23709844, 72),
 (23734420, 65),
 (23734422, 65),
 (23734442, 65),
 (23734433, 65),
 (23734448, 65),
 (23734440, 65),
 (23734443, 65),
 (23734435, 65),
 (23734428, 65),
 (23734421, 65),
 (23734426, 65),
 (23734429, 65),
 (23734434, 65),
 (23734439, 65),
 (23734438, 63),
 (20683699, 62),
 (20683697, 62),
 (22067176, 62),
 (22067192, 62),
 (10040387, 62),
 (10040386, 62),
 (22067195, 62),
 (22067194, 62),
 (22067190, 62),
 (22067191, 62),
 (22067189, 62),
 (22067197, 62),
 (22067196, 62),
 (20174480, 62),
 (20174481, 62),
 (22067200, 62),
 (22067201, 62),
 (22066123, 62),
 (22066121, 62),
 (22066418, 62),
 (22066420, 62),
 (22066417, 62),
 (22066419, 62),
 (22066421, 62),
 (22067185, 61),
 (22065234, 61),
 (22067177, 61)]

In [18]:
df = pd.read_csv('./Dataset/details_of_delivery_request.csv', index_col=0, encoding='cp949')

itmClsNmb = df['물품분류번호']
dtlItmClsNmb = df['세부물품분류번호']
itmIdnNmb = df['물품식별번호']
untPrc = df['단가']
unt = df['단위']

Counter(itmIdnNmb).most_common(500)

  interactivity=interactivity, compiler=compiler, result=result)


[(23734420, 4216),
 (23709986, 3677),
 (23734426, 3005),
 (23709843, 2610),
 (23734428, 2243),
 (23734435, 1977),
 (23734462, 1684),
 (22723642, 1520),
 (23709844, 1512),
 (20373322, 1493),
 (21567236, 1487),
 (10061680, 1486),
 (23734421, 1389),
 (10061686, 1317),
 (23734469, 1289),
 (23709952, 1279),
 (10061682, 1275),
 (10063866, 1194),
 (23709992, 1170),
 (10063867, 1040),
 (23709989, 1030),
 (23734434, 1027),
 (20912089, 966),
 (20469354, 927),
 (10061688, 870),
 (20912098, 850),
 (23734440, 827),
 (23709951, 790),
 (23709991, 787),
 (23709954, 771),
 (21654963, 732),
 (10063868, 727),
 (20160899, 703),
 (10067139, 685),
 (20770467, 682),
 (20373317, 671),
 (24262321, 669),
 (23709842, 665),
 (10061684, 660),
 (24043775, 655),
 (23734422, 596),
 (21590439, 590),
 (20770466, 587),
 (10063865, 580),
 (10063869, 538),
 (20373320, 531),
 (20160900, 524),
 (23734429, 491),
 (24262334, 461),
 (23410961, 456),
 (21624130, 453),
 (20918014, 430),
 (21087037, 423),
 (23734517, 421),
 (2431