In [11]:
import requests
import scrapy
import pandas as pd

from bs4 import BeautifulSoup
from csv import writer


# https://developers.whatismybrowser.com/useragents/parse/2899915-chrome-windows-blink
# https://www.dia.es/compra-online/robots.txt

def init_scraper():
    '''
    UserAgent -> A modificar per un llistat d'user Agents "randomizer"
    url -> Direccio web inicial, a modificar per el scraper
    
    return:
    soup -> dades html
    '''
    UserAgent =  ({'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
            'Accept-Language': 'en-US, en;q=0.5'})
    url = "https://www.dia.es/compra-online/frescos/carne/cf?page=1"
    soup = get_soup(url, UserAgent)
    return soup

def get_soup(url, agent):
    web = requests.get(url, headers = agent)
    soup = BeautifulSoup(web.content, 'html.parser')
    return soup    

def get_products(soup):
    '''
    We will check all the products in the main page and get the following information:
    productName -> To retrieve the name of the product
    productPrice -> To retrieve the price per unit of the product
    productpricePerKilo -> To retrieve the price per kilo of the product
    productLink -> To retrive the nutritional values of the product
    '''
        
    df_products =  pd.DataFrame(columns=['Id', 'NomProducte', 'PreuProducte', 'PreuPerKiloProducte', 'LinkProducte'])
    
    productId = 0
    for div in soup.find_all("div", class_ = "product-list__item"):
        productName = div.find("span", class_ = "details").get_text(strip=True)    
        productPrice = div.find("p", class_ = "price").get_text(strip=True)   
        productPricePerKilo = div.find("p", class_ = "pricePerKilogram").get_text(strip=True)
    
        # Getting link to the product
        link = div.find("a", href=True)
        productLink = link['href']
    
        product = { 'Id': productId,
            'NomProducte': productName,
            'PreuProducte': productPrice,
            'PreuPerKiloProducte': productPricePerKilo,
            'LinkProducte': productLink}
        
        # Adding the product reviewed to the dataframe
        df_products = df_products.append(product, ignore_index = True)
        productId = productId +1

    #data_to_csv(id = product, name = productName, price = productPrice, priceKilo = productPricePerKilo, link = productLink)
    return df_products
        
soup = init_scraper()
df_products = get_products(soup)

# Send the dataframe to a csv
df_products.to_csv('dades_carn.csv', index = False , encoding='utf-8-sig')
