In [1]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [2]:
import pandas as pd

In [3]:
# Descargamos el recurso 
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\USUARIO\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
# Cargo los datos.
df = pd.read_csv(r'Dataset\Procesado\user_reviews.csv')

In [5]:
df.head(3)

Unnamed: 0,item_id,review
0,1250.0,Simple yet with great replayability. In my opi...
1,22200.0,It's unique and worth a playthrough.
2,43110.0,Great atmosphere. The gunplay can be a bit chu...


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59333 entries, 0 to 59332
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   item_id  59305 non-null  float64
 1   review   59275 non-null  object 
dtypes: float64(1), object(1)
memory usage: 927.2+ KB


In [7]:
df['review'] = df['review'].fillna(' ')

In [8]:
#reviso
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59333 entries, 0 to 59332
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   item_id  59305 non-null  float64
 1   review   59333 non-null  object 
dtypes: float64(1), object(1)
memory usage: 927.2+ KB


In [9]:
df = df.dropna()

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 59305 entries, 0 to 59332
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   item_id  59305 non-null  float64
 1   review   59305 non-null  object 
dtypes: float64(1), object(1)
memory usage: 1.4+ MB


In [11]:
df.describe()

Unnamed: 0,item_id
count,59305.0
mean,169935.176511
std,132029.464174
min,10.0
25%,8930.0
50%,218230.0
75%,261760.0
max,527340.0


In [12]:
df1 = df.copy()

In [13]:
#Creo la instancia del analizador de sentimiento:
sia = SentimentIntensityAnalyzer()

In [14]:
# Funcion para crear los resultados a etiquetas categoricas

def sentiment_to_label(sentiment_scores):
    if sentiment_scores['compound'] >= 0.05:
        return 2  # Positivo
    elif sentiment_scores['compound'] > -0.05:
        return 1  # Neutral
    else:
        return 0  # Malo

In [15]:
# Aplico el analisis de sentimiento a cada fila y guardo los resultados en una nueva columna del df.
df1['sentiment_analysis'] = df1['review'].apply(sia.polarity_scores).apply(sentiment_to_label)
df1.head(3)

Unnamed: 0,item_id,review,sentiment_analysis
0,1250.0,Simple yet with great replayability. In my opi...,2
1,22200.0,It's unique and worth a playthrough.,2
2,43110.0,Great atmosphere. The gunplay can be a bit chu...,2


In [16]:
df1.tail(15)

Unnamed: 0,item_id,review,sentiment_analysis
59318,413150.0,I realy like this game it has gotten me to und...,2
59319,242760.0,"Good Game Play, encountered a problem with cli...",0
59320,427730.0,dont ask,1
59321,280790.0,"Great game, specially if you need to past time...",2
59322,570.0,Good one,2
59323,261030.0,I cried in the end its so sadding ]'; I wish l...,2
59324,730.0,Gra naprawdę fajna.Ale jest kilka rzeczy do kt...,1
59325,570.0,Well Done,2
59326,233270.0,this is a very fun and nice 80s themed shooter...,2
59327,130.0,if you liked Half life i would really recommen...,2


In [17]:
#creo las variables dummies para despues poder agrupar por item_id y obtener la suma de todos los reviews.

df_dummies = pd.get_dummies(df1['sentiment_analysis'], prefix='sentiment', dtype='int')

In [18]:
df_dummies.head(3)

Unnamed: 0,sentiment_0,sentiment_1,sentiment_2
0,0,0,1
1,0,0,1
2,0,0,1


In [19]:
df_dummies = df_dummies.rename(columns={'sentiment_0':'malo'})

In [20]:
df_dummies = df_dummies.rename(columns={'sentiment_1':'neutral'})

In [21]:
df_dummies = df_dummies.rename(columns={'sentiment_2':'positivo'})

In [22]:
#Nombres de columnas cambiados
df_dummies.head(3)

Unnamed: 0,malo,neutral,positivo
0,0,0,1
1,0,0,1
2,0,0,1


In [23]:
#concateno con el dataframe df1
df_final = pd.concat([df1, df_dummies], axis=1)

In [24]:
df_final

Unnamed: 0,item_id,review,sentiment_analysis,malo,neutral,positivo
0,1250.0,Simple yet with great replayability. In my opi...,2,0,0,1
1,22200.0,It's unique and worth a playthrough.,2,0,0,1
2,43110.0,Great atmosphere. The gunplay can be a bit chu...,2,0,0,1
3,251610.0,I know what you think when you see this title ...,2,0,0,1
4,227300.0,For a simple (it's actually not all that simpl...,2,0,0,1
...,...,...,...,...,...,...
59328,70.0,a must have classic from steam definitely wort...,2,0,0,1
59329,362890.0,this game is a perfect remake of the original ...,2,0,0,1
59330,273110.0,had so much fun plaing this and collecting res...,2,0,0,1
59331,730.0,:D,2,0,0,1


In [25]:
#ya hecho el analisis elimino las columnas que no nesecito
df_final= df_final.drop(['review', 'sentiment_analysis'], axis=1)

In [42]:
df_final.head(3)

Unnamed: 0,item_id,malo,neutral,positivo
0,1250.0,0,0,1
1,22200.0,0,0,1
2,43110.0,0,0,1


In [41]:
#Cargo el archivo que tenia previamente con lso nombres para hacer un merge de los datos usando al columna item_id
df_final.to_csv(r'Dataset\Procesado\sentiment.csv', index=False)