In [26]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch
import pandas as pd
import time

In [27]:
rest_google_reviews = pd.read_csv("../Datasets/Restaurantes_Google_Reviews.csv",delimiter=",")
rest_google_reviews.head()

Unnamed: 0,user_id,name,time,rating,text,pics,resp,gmap_id
0,104959831058524862747,Samuel Benson,1630018420109,4,A pretty seamless experience from top to botto...,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3
1,100925653611153628883,Emily Probst,1629223016683,5,Great food and great service! Being so close t...,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3
2,100496729305505499660,Brianna Dungan,1629141770542,5,Pleasant location.,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3
3,115619895645642399701,Aaron Tim,1630363338649,5,Finally a wingstop in Provo,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3
4,115522360097670804488,Heather Ann Bruin,1629597692008,5,,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3


In [28]:
print("CUDA disponible:", torch.cuda.is_available())

CUDA disponible: True


In [29]:
# Verifica si CUDA está disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

***

In [22]:
# Carga modelos a la GPU a excepcion del modelo de tokens
tokenizer = AutoTokenizer.from_pretrained("finiteautomata/beto-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/beto-sentiment-analysis")
model = model.to(device)
pipe_beto = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0 if str(device) == "cuda" else -1)

In [23]:
# Función para procesar la reseña y mostrar progreso cada 1 minuto
def sentiment_status(review):
    if review is None:
        return None
    else:
        text = str(review)[:512]
        clasificacion = pipe_beto(text)
        return clasificacion[0]['label']

# Contador de reviews procesados
elementos_procesados = 0

# Tiempo inicial
tiempo_inicial = time.time()

# Itera sobre los reviews y muestra el progreso cada 1 minuto
for index, row in rest_google_reviews.iterrows():
    rest_google_reviews.at[index, 'sentiment'] = sentiment_status(row['text'])
    elementos_procesados += 1
    
    # Calcula el tiempo transcurrido
    tiempo_transcurrido = time.time() - tiempo_inicial
    
    # Calcula el tiempo estimado restante
    elementos_restantes = len(rest_google_reviews) - elementos_procesados
    tiempo_restante = (tiempo_transcurrido / elementos_procesados) * elementos_restantes
    
    # Verifica si ha pasado 1 minuto
    if tiempo_transcurrido >= 60:
        tiempo_inicial = time.time()
        print(f"Procesados: {elementos_procesados}/{len(rest_google_reviews)}, % completado: {round(elementos_procesados/len(rest_google_reviews)*100, 2)}%, Tiempo estimado restante: {round((tiempo_restante/60)/60,2)} horas")

# Mensaje final
print("Procesamiento completado.")



Procesados: 7560/152797, % completado: 4.95%, Tiempo estimado restante: 0.32 horas


KeyboardInterrupt: 

In [12]:
rest_google_reviews.head(20)

Unnamed: 0.1,Unnamed: 0,user_id,name_x,text,sentiment
0,0,104959831058524862747,Samuel Benson,A pretty seamless experience from top to botto...,NEU
1,1,100925653611153628883,Emily Probst,Great food and great service! Being so close t...,POS
2,2,100496729305505499660,Brianna Dungan,Pleasant location.,NEU
3,3,115619895645642399701,Aaron Tim,Finally a wingstop in Provo,NEU
4,4,115522360097670804488,Heather Ann Bruin,,NEU
5,5,111139967532022596352,Hootie Hennessey,,NEU
6,6,104959831058524862747,Samuel Benson,A pretty seamless experience from top to botto...,NEU
7,7,100925653611153628883,Emily Probst,Great food and great service! Being so close t...,POS
8,8,100496729305505499660,Brianna Dungan,Pleasant location.,NEU
9,9,115619895645642399701,Aaron Tim,Finally a wingstop in Provo,NEU


***

In [30]:
# Cargar el tokenizer y el modelo en el dispositivo
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment').to(device)

In [31]:
# Función para procesar la reseña y mostrar progreso cada 1 minuto
def sentiment_stars(review):
    if review is None:
        return None
    else:
        text = str(review)[:512]
        tokens = tokenizer.encode(text, return_tensors='pt').to(device)
        result = model(tokens)[0]  # Access the logits
        return int(torch.argmax(result))

In [32]:
# Contador de reviews procesados
elementos_procesados = 0

# Tiempo inicial
tiempo_inicial = time.time()

# Itera sobre los reviews y muestra el progreso cada 1 minuto
for index, row in rest_google_reviews.iterrows():
    rest_google_reviews.at[index, 'sentiment'] = sentiment_stars(row['text'])
    elementos_procesados += 1
    
    # Calcula el tiempo transcurrido
    tiempo_transcurrido = time.time() - tiempo_inicial
    
    # Calcula el tiempo estimado restante
    elementos_restantes = len(rest_google_reviews) - elementos_procesados
    tiempo_restante = (tiempo_transcurrido / elementos_procesados) * elementos_restantes
    
    # Verifica si ha pasado 1 minuto
    if tiempo_transcurrido >= 60:
        tiempo_inicial = time.time()
        print(f"Procesados: {elementos_procesados}/{len(rest_google_reviews)}, % completado: {round(elementos_procesados/len(rest_google_reviews)*100, 2)}%, Tiempo estimado restante: {round((tiempo_restante/60)/60,2)} horas")

# Mensaje final
print("Procesamiento completado.")

Procesados: 6388/152797, % completado: 4.18%, Tiempo estimado restante: 0.38 horas
Procesados: 12881/152797, % completado: 8.43%, Tiempo estimado restante: 0.18 horas
Procesados: 19306/152797, % completado: 12.64%, Tiempo estimado restante: 0.12 horas
Procesados: 25747/152797, % completado: 16.85%, Tiempo estimado restante: 0.08 horas
Procesados: 32279/152797, % completado: 21.13%, Tiempo estimado restante: 0.06 horas
Procesados: 38877/152797, % completado: 25.44%, Tiempo estimado restante: 0.05 horas
Procesados: 45492/152797, % completado: 29.77%, Tiempo estimado restante: 0.04 horas
Procesados: 51947/152797, % completado: 34.0%, Tiempo estimado restante: 0.03 horas
Procesados: 58451/152797, % completado: 38.25%, Tiempo estimado restante: 0.03 horas
Procesados: 65011/152797, % completado: 42.55%, Tiempo estimado restante: 0.02 horas
Procesados: 71558/152797, % completado: 46.83%, Tiempo estimado restante: 0.02 horas
Procesados: 78113/152797, % completado: 51.12%, Tiempo estimado resta

KeyboardInterrupt: 

In [33]:
rest_google_reviews.head(20)

Unnamed: 0,user_id,name,time,rating,text,pics,resp,gmap_id,sentiment
0,104959831058524862747,Samuel Benson,1630018420109,4,A pretty seamless experience from top to botto...,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3,4.0
1,100925653611153628883,Emily Probst,1629223016683,5,Great food and great service! Being so close t...,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3,4.0
2,100496729305505499660,Brianna Dungan,1629141770542,5,Pleasant location.,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3,3.0
3,115619895645642399701,Aaron Tim,1630363338649,5,Finally a wingstop in Provo,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3,4.0
4,115522360097670804488,Heather Ann Bruin,1629597692008,5,,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3,1.0
5,111139967532022596352,Hootie Hennessey,1629774057722,5,,,,0x874d9bf9d1db7c85:0xf1c3706a2b497a3,1.0
6,116839886208413920152,Andrea Ayala,1630539640545,5,This establishment is very family oriented and...,,"{'time': 1630944387641, 'text': 'Thank you, An...",0x87530f390e058629:0x292320115b1705ea,4.0
7,111930204658217490762,Jeanette Rodgers,1625534118216,4,Wow. Already has three 5-star reviews. Very re...,[{'url': ['https://lh5.googleusercontent.com/p...,"{'time': 1630945915991, 'text': ""Jeanette, Tha...",0x87530f390e058629:0x292320115b1705ea,2.0
8,110612871828582311715,Lissette Diaz,1627706899742,5,"Best best best Mexican food. Authentic, colorf...",,"{'time': 1630946322270, 'text': ""Thank you, so...",0x87530f390e058629:0x292320115b1705ea,4.0
9,108947133017703821995,Keumi,1627766593988,5,"Absolutely delicious, best Mexican food in tow...",,"{'time': 1630946176765, 'text': ""Keumi, Thank ...",0x87530f390e058629:0x292320115b1705ea,4.0
