In [None]:
from datetime import datetime
import pandas as pd
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.log import configure_logging

class IowaFoodHubSpider(scrapy.Spider):
    name = 'Iowa Food Hub'
    currentDate = str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-8]
    def start_requests( self ):

        iowaFoodHubBaconUrl = 'https://iowa-food-hub.myshopify.com/search?q=bacon'
        yield scrapy.Request( url = iowaFoodHubBaconUrl, callback = self.iowaFoodHubSearch, meta={'url': iowaFoodHubBaconUrl, 'type': 'bacon'})

        iowaFoodHubEggsUrl = 'https://iowa-food-hub.myshopify.com/search?q=Egg'
        yield scrapy.Request( url = iowaFoodHubEggsUrl, callback = self.iowaFoodHubSearch, meta={'url': iowaFoodHubEggsUrl, 'type': 'eggs'})

    def iowaFoodHubSearch(self, response):
        #Failsafe for links
        try:
            #grabs all cards from list and saves the link to follow
            xpath = '//*[@id="MainContent"]//a[contains(@class,"list-view-item")]/@href'
            linkList = response.xpath(xpath)
            productType = response.meta.get('type')
            if productType == 'bacon':
                for url in linkList:
                    yield response.follow( url = url, callback = self.iowaFoodHubBacon, meta={'url': response.meta.get('url')}, dont_filter=True )
            elif productType == 'eggs':
                for url in linkList:
                    yield response.follow( url = url, callback = self.iowaFoodHubEggs, meta={'url': response.meta.get('url')}, dont_filter=True )
        except AttributeError:
           pass

    def iowaFoodHubBacon(self, response):
        #validating the name
        nameXpath = '//*[@id="ProductSection-product-template"]//*[contains(@class, "product-single__title")]/text()'
        name = response.xpath(nameXpath).extract_first()
        desiredNames = {"bacon"}
        if not self.containsWord(name, desiredNames):
            return 
        #The other areas we are interested in
        venderXpath = '//*[@id="ProductSection-product-template"]//*[contains(@class, "product-single__vendor")]/text()'
        priceXpath = '//*[@id="ProductPrice-product-template"]/text()'
        
        #getting the product discription
        discXpath = '//*[@id="ProductSection-product-template"]//*[contains(@class, "product-single__description") and @itemprop="description"]/*'
        description = response.xpath(discXpath)
        descriptionText = ''
        for text in description:
            descriptionText += "".join(text.xpath('.//text()').extract_first().strip())
        
        #Adding product to data frame    
        IowaFoodHubBaconDataFrame.loc[len(IowaFoodHubBaconDataFrame)] = [name,
                                                                         response.xpath(venderXpath).extract_first(), 
                                                                         response.xpath(priceXpath).extract_first(), 
                                                                         descriptionText,                                       
                                                                         self.currentDate,
                                                                         response.meta.get('url')
                                                                        ]
                
    def iowaFoodHubEggs(self, response):
        #validating the name
        nameXpath = '//*[@id="ProductSection-product-template"]//*[contains(@class, "product-single__title") and @itemprop="name"]/text()'       
        name = response.xpath(nameXpath).extract_first()
        desiredNames = {"egg"}
        if not self.containsWord(name, desiredNames):
            return 
        #The other areas we are interested in
        venderXpath = '//*[@id="ProductSection-product-template"]//*[contains(@class, "product-single__vendor") and @itemprop="brand"]/text()'
        priceXpath = '//*[@id="ProductPrice-product-template" and @itemprop="price"]/text()'

        #getting the product discription
        discXpath = '//*[@id="ProductSection-product-template"]//*[contains(@class, "product-single__description") and @itemprop="description"]/*'
        description = response.xpath(discXpath)
        descriptionText = ''
        for text in description:
            descriptionText += "".join(text.xpath('.//text()').extract_first()).strip()
        
        #Adding product to data frame
        IowaFoodHubEggDataFrame.loc[len(IowaFoodHubEggDataFrame)] = [name,
                                                                     response.xpath(venderXpath).extract_first(), 
                                                                     response.xpath(priceXpath).extract_first(), 
                                                                     descriptionText,                                       
                                                                     self.currentDate,
                                                                     response.meta.get('url')
                                                                    ]
    def containsWord(self, string, validWords):
        checkText = string.replace(" ", "").lower()
        for word in validWords:
            if word in checkText:
                return True
        return False

DEBUG = False
#Data frames
IowaFoodHubBaconDataFrame = pd.DataFrame(columns=['Bacon', 'Vender', 'Price', 'Weight', 'Extraction Date', 'Url'])
IowaFoodHubEggDataFrame = pd.DataFrame(columns=['Eggs', 'Vender', 'Price', 'Amount', 'Extraction Date', 'Url'])
if(DEBUG):
    #To see the inner mechanics of the spider
    configure_logging()

#This is to start the spider
process = CrawlerProcess()
process.crawl(IowaFoodHubSpider)
process.start()
process.stop()
currentDate = str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-8]

IowaFoodHubBaconDataFrame.to_csv(currentDate + "Iowa Food Hub Bacon.csv")
IowaFoodHubEggDataFrame.to_csv(currentDate + "Iowa Food Hub Eggs.csv")

if(DEBUG):
    #To see the outputs
    print(IowaFoodHubBaconDataFrame)
    print(IowaFoodHubEggDataFrame)

