In [263]:
# STEP0
# ライブラリのインポート

import requests
from bs4 import BeautifulSoup
from time import sleep
import pandas as pd
import numpy as np
import os 
import datetime as dt
import tqdm

base_url = 'https://gemstore.tokyo/collections/'
dict = {
    'tops':'tops?page={}',
    'outer':'outer?page={}',
    'bottoms':'bottoms?page={}',
    'set-up':'set-up?page={}',
    'shoes':'shoes?page={}',
    'accessory':'accessory?page={}',
    'bag':'bag?page={}',
    'smartphone-case':'smartphone-case?page={}'
}

desktop_dir = os.path.expanduser('~/Desktop') 
dt_now = dt.datetime.now()
yyyymmddhhss = dt_now.strftime('%Y%m%d%H%M')
directory_name = u'gemstoreデータ抽出_' + yyyymmddhhss
top_path = f'{desktop_dir}/{directory_name}'
img_path = f'{desktop_dir}/{directory_name}/img'

sections = ['tops','outer','bottoms','set-up','shoes','accessory','bag','smartphone-case']     
section_list = []
for section in sections:
    section_list.append(dict[section])

In [264]:
def get_goods_list(section_list):
    
    goods_list=[]
    
    for section_page in tqdm.tqdm(section_list):
        section_url = base_url + section_page

        for i in range(1000):
            page_url = section_url.format(i+1)
            page_r = requests.get(page_url)
            page_r.raise_for_status()
            sleep(1)
            page_soup = BeautifulSoup(page_r.content,'lxml')

            if page_soup.select('h2:-soup-contains("商品が見つかりません")') :
                break

            goods_tags = page_soup.select('ul#product-grid >li>div>div>div.card__content h3>a')
            for goods_tag in goods_tags:
                goods_list.append(goods_tag.get('href'))
    
    return goods_list

In [265]:
goods_list = get_goods_list(section_list)

100%|█████████████████████████████████████████████| 8/8 [01:44<00:00, 13.03s/it]


In [267]:
def make_folder(top_path,img_path):
    os.makedirs(top_path)
    os.makedirs(img_path)

In [268]:
make_folder(top_path,img_path)

In [269]:
def get_result_img(goods_list,img_path,top_path):

    df_obj = pd.DataFrame()
    item_list = []

    for goods in tqdm.tqdm(goods_list[:10]):
        goods_url = 'https://gemstore.tokyo' + goods
        goods_r = requests.get(goods_url)
        goods_r.raise_for_status()
        sleep(1)
        soup = BeautifulSoup(goods_r.content,'lxml')
        
        title = soup.select_one('div.product__info-wrapper h1.product__title').text
        title = title.replace('\n','')
        title = title.strip()
        name = title[-6:]
        
        price = soup.select_one('div.price__regular span.price-item').text
        price = price.replace('\n','')
        price = price.strip()
        
        color_tags = soup.select('div.product__info-wrapper fieldset:-soup-contains("カラー") input')
        color_list =[]
        for color_tag in color_tags:
            color = color_tag['value']
            color_list.append(color)
        colors = '・'.join(color_list)       
        
        size_tags = soup.select('div.product__info-wrapper fieldset:-soup-contains("サイズ") input')
        size_list =[]
        for size_tag in size_tags:
            size = size_tag['value']
            size_list.append(size)
        sizes = '・'.join(size_list)

        snipet_tags = soup.select('div.product__description')
        snipet_list = []
        for snipet_tag in snipet_tags:
            snipet = snipet_tag.text
            snipet_list.append(snipet)
        snipets = '。'.join(snipet_list) 
        snipets = snipets.replace('\n','')
        snipets = snipets.replace('続きを読む','')
        snipets = snipets.strip()
            
        img_tags = soup.select('div.product__media-wrapper slider-component:first-of-type>ul>li>div img')
        sku_list = []
        for img_tag in img_tags:
            sku_img = 'https:' + img_tag.get('src')
            sku_list.append(sku_img)

        df1 = pd.DataFrame(sku_list,columns=[title])
        df1.index = np.arange(1,len(df1)+1)
        df1 = df1.T
        df1 = df1.add_prefix("SKU画像_")
        df_obj = pd.concat([df_obj,df1])
        
        i = 1
        for target in sku_list:
            re = requests.get(target)
            sleep(1)
            
            with open(f'{img_path}/{name}_{i}.jpeg','wb') as f:
                f.write(re.content)
            i += 1
        
        item_list.append({
            '商品名':title,
            '価格':price,
            '色':colors,
            'サイズ':sizes,
            '商品概要':snipets
        })
        
    df2 = pd.DataFrame(item_list)

    # df_objの型をdf2に合わせる
    df_obj = df_obj.reset_index().rename(columns={'index':'商品名'})

    # df_objとdf2をマージする
    merge_df = df2.merge(df_obj)
    
    return merge_df

In [270]:
merge_df = get_result_img(goods_list,img_path,top_path)

100%|███████████████████████████████████████████| 10/10 [02:53<00:00, 17.39s/it]


In [271]:
merge_df

Unnamed: 0,商品名,価格,色,サイズ,商品概要,SKU画像_1,SKU画像_2,SKU画像_3,SKU画像_4,SKU画像_5,...,SKU画像_7,SKU画像_8,SKU画像_9,SKU画像_10,SKU画像_11,SKU画像_12,SKU画像_13,SKU画像_14,SKU画像_15,SKU画像_16
0,ドレープカジュアルスウェット gm4114,"¥7,600",グレー,M・L・XL・2XL,モードアウターと合わせて着たいシンプルな韓国っぽニット。グレーのインナーと合わせることで、こ...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,,,,,,
1,バイカラータイトシャツ gm4234,"¥7,300",ホワイト・ブラック,M・L・XL・2XL,ワンランク上のスタイルになれるバイカラーシャツ。モノトーンスタイルがお好きな方におすすめのシ...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...
2,ハーフジップパーカー gm4411,"¥5,900",ブラック,M・L・XL,流行のハーフジップパーカー。ジップチャームのデザインが他のアイテムと被らないので、オシャレに...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,,,,
3,ブラックソウルパーカー gm4402,"¥7,800",ブラック,M・L・XL・2XL,ウォッシュド加工がモード感を漂わせるパーカー。一枚でモドストを体現できるアイテムとなっていま...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,,,,,,,,,,
4,インビジブルセンスパーカー gm3959,"¥6,700",ブラック・グレー,M・L・XL,ブラック、グレーの2色展開パーカー。フロントのプリントがモード感漂わせるデザインになっていま...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,,,
5,エッジナショナルプリントロングTシャツ gm3958,"¥6,300",ブラック,M・L・XL・2XL,グラフィックなデザインと全体的にウォッシュド加工が施されているロングTシャツ。着るだけで雰囲...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,
6,アニハビテッドセーター gm3960,"¥6,300",ブラック・ホワイト,M・L・XL,モードな雰囲気を漂わせるセーター。ブラック、ホワイトの2色展開となっており、どちらのカラーで...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,,,,,
7,MASKTIDEロングTシャツ gm3948,"¥5,800",ブラック,M・L・XL,フロントとバックにプリントが施されているロングTシャツ。全体的にウォッシュド加工が施されてい...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,,,,,,,,,,
8,コリアンモダンカラーシャツ gm5033,"¥5,200",ホワイト・ブラック,S・M・L・XL・2XL・3XL・4XL・5XL,ビッグカラーがポイントの韓国っぽシャツ✨シンプルなのに一枚でおしゃれ上級者さんコーデに仕上げ...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,
9,Circle ロングTシャツ gm3900,"¥4,700",ブラック・ホワイト,M・L・XL,フロントとバックにサークルプリントが施されたロングTシャツ。ブラック、ホワイトの2色展開され...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,https://cdn.shopify.com/s/files/1/0524/2054/26...,,,,


In [272]:
merge_df.to_csv(f'{top_path}/gemstoreデータ抽出.csv',index=None,encoding='utf-8-sig')

In [242]:
def get_result_onlycsv(goods_list,top_path,img_path):

    df_obj = pd.DataFrame()
    item_list = []

    for goods in tqdm.tqdm(goods_list[:10]):
        goods_url = 'https://gemstore.tokyo' + goods

        goods_r = requests.get(goods_url)
        goods_r.raise_for_status()
        sleep(2)
        soup = BeautifulSoup(goods_r.content,'lxml')

        title = soup.select_one('div.product__info-wrapper h1.product__title').text
        title = title.replace('\n','')
        title = title.strip()
        name = title[-6:]

        price = soup.select_one('div.price__regular span.price-item').text
        price = price.replace('\n','')
        price = price.strip()

        color_tags = soup.select('div.product__info-wrapper fieldset:-soup-contains("カラー") input')
        color_list =[]
        for color_tag in color_tags:
            color = color_tag['value']
            color_list.append(color)
        colors = '・'.join(color_list)       

        size_tags = soup.select('div.product__info-wrapper fieldset:-soup-contains("サイズ") input')
        size_list =[]
        for size_tag in size_tags:
            size = size_tag['value']
            size_list.append(size)
        sizes = '・'.join(size_list)

        snipet_tags = soup.select('div.product__description')
        snipet_list = []
        for snipet_tag in snipet_tags:
            snipet = snipet_tag.text
            snipet_list.append(snipet)
        snipets = '。'.join(snipet_list) 
        snipets = snipets.replace('\n','')
        snipets = snipets.replace('続きを読む','')
        snipets = snipets.strip()


        img_tags = soup.select('div.product__media-wrapper slider-component:first-of-type>ul>li>div img')
        sku_list = []
        for img_tag in img_tags:
            sku_img = 'https:' + img_tag.get('src')
            sku_list.append(sku_img)

        df1 = pd.DataFrame(sku_list,columns=[title])
        df1.index = np.arange(1,len(df1)+1)
        df1 = df1.T
        df1 = df1.add_prefix("SKU画像_")
        df_obj = pd.concat([df_obj,df1])

        item_list.append({
            '商品名':title,
            '価格':price,
            '色':colors,
            'サイズ':sizes,
            '商品概要':snipets
        })
        
    df2 = pd.DataFrame(item_list)

    # df_objの型をdf2に合わせる
    df_obj = df_obj.reset_index().rename(columns={'index':'商品名'})

    # df_objとdf2をマージする
    merge_df = df2.merge(df_obj)
    
    return merge_df
