In [2]:
#Para el analisis de sentimientos usaremos VADER (Valence Aware Dictionary and sEntiment Reasoner) de la libreria de NLTK
#Se escogio este analizador ya que esta enfocado en textos cortos, como reseñas.
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [3]:
#Ahora procedemos a descargar un diccionario lexico
nltk.download("vader_lexicon")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Cristian\AppData\Roaming\nltk_data...


True

In [4]:
#Cargamos el dataset
ruta = r"C:\\Users\\Cristian\\Desktop\DATA SCIENCE\\HENRY\\PI1\\PI MLOps - STEAM\\Datos Estructurados\\user_reviews.parquet"
df_reviews_analysis = pd.read_parquet(ruta)

In [5]:
df_reviews_analysis.head(5)

Unnamed: 0,User_Id,Item_Id,Recommend,Review
0,76561197970982479,1250,True,Simple yet with great replayability. In my opi...
1,76561197970982479,22200,True,It's unique and worth a playthrough.
2,76561197970982479,43110,True,Great atmosphere. The gunplay can be a bit chu...
3,js41637,251610,True,I know what you think when you see this title ...
4,js41637,227300,True,For a simple (it's actually not all that simpl...


In [6]:
#Ahora creamos la funcion de analisis de sentimientos

def analisis_sentimiento(review):
    # Verificamos si la reseña es nula o vacía
    if pd.isnull(review) or review.strip() == '':
        return 1  # Para asignar 1 a los valores nulos o vacíos

    sia = SentimentIntensityAnalyzer()
    sentimiento = sia.polarity_scores(review)["compound"]

    # Clasificamos el sentimiento en función de la puntuación
    if sentimiento > 0.2:
        return 2  # Es positivo
    elif sentimiento < -0.2:
        return 0  # Es negativo
    else: 
        return 1  # Es Neutral

In [8]:
df_reviews_analysis["Analisis_Sentimiento"] = df_reviews_analysis["Review"].fillna("").apply(lambda x: analisis_sentimiento(x))
df_reviews_analysis

Unnamed: 0,User_Id,Item_Id,Recommend,Review,Analisis_Sentimiento
0,76561197970982479,1250,True,Simple yet with great replayability. In my opi...,2
1,76561197970982479,22200,True,It's unique and worth a playthrough.,2
2,76561197970982479,43110,True,Great atmosphere. The gunplay can be a bit chu...,2
3,js41637,251610,True,I know what you think when you see this title ...,2
4,js41637,227300,True,For a simple (it's actually not all that simpl...,2
...,...,...,...,...,...
58453,76561198312638244,70,True,a must have classic from steam definitely wort...,2
58454,76561198312638244,362890,True,this game is a perfect remake of the original ...,2
58455,LydiaMorley,273110,True,had so much fun plaing this and collecting res...,2
58456,LydiaMorley,730,True,:D,2


In [9]:
#Ahora eliminamos la columna review
df_reviews_analysis = df_reviews_analysis.drop(columns="Review", axis = 1)

In [10]:
#Visualizamos el nuevo DataFrame
df_reviews_analysis.head(3)

Unnamed: 0,User_Id,Item_Id,Recommend,Analisis_Sentimiento
0,76561197970982479,1250,True,2
1,76561197970982479,22200,True,2
2,76561197970982479,43110,True,2


In [11]:
#Exportamos el archivo a parquet
df_reviews_analysis.to_parquet("df_review_final.parquet", engine = "pyarrow")