In [1]:
import pandas as pd # Para trabajar con dataframes
import numpy as np # Para operaciones y funciones con arrays
import gzip # Para cargar archivo gz
import ast  # Para cargar contenido tipo diccionario dentro de los json
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # modelo entrenado para NPL

In [2]:
def parseo (ruta):
  # Lista para almacenar los diccionarios JSON
  fjson = []
  # Abre el archivo comprimido en modo lectura binaria ("rb")
  with gzip.open(ruta, "rb") as archivo:
      # Descomprime el contenido del archivo y lo decodifica como utf-8
      contenido = archivo.read().decode("utf-8").splitlines()

      # Recorre cada línea descomprimida
      for fila in contenido:
          # Lee cada línea del archivo como una cadena JSON
          fila_json = ast.literal_eval(fila.strip())

          # Agrega el diccionario JSON a la lista
          fjson.append(fila_json)

  # Crea el DataFrame directamente desde la lista de diccionarios
  df = pd.DataFrame(fjson)
  return df

In [3]:
reviews = parseo('../Datasets/user_reviews.json.gz')

In [4]:
reviews.head()

Unnamed: 0,user_id,user_url,reviews
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,"[{'funny': '', 'posted': 'Posted November 5, 2..."
1,js41637,http://steamcommunity.com/id/js41637,"[{'funny': '', 'posted': 'Posted June 24, 2014..."
2,evcentric,http://steamcommunity.com/id/evcentric,"[{'funny': '', 'posted': 'Posted February 3.',..."
3,doctr,http://steamcommunity.com/id/doctr,"[{'funny': '', 'posted': 'Posted October 14, 2..."
4,maplemage,http://steamcommunity.com/id/maplemage,"[{'funny': '3 people found this review funny',..."


In [5]:
reviews.drop(columns=['user_url'],inplace=True)

In [6]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25799 entries, 0 to 25798
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   user_id  25799 non-null  object
 1   reviews  25799 non-null  object
dtypes: object(2)
memory usage: 403.2+ KB


In [7]:
reviews=reviews.explode('reviews')

In [8]:
reviews

Unnamed: 0,user_id,reviews
0,76561197970982479,"{'funny': '', 'posted': 'Posted November 5, 20..."
0,76561197970982479,"{'funny': '', 'posted': 'Posted July 15, 2011...."
0,76561197970982479,"{'funny': '', 'posted': 'Posted April 21, 2011..."
1,js41637,"{'funny': '', 'posted': 'Posted June 24, 2014...."
1,js41637,"{'funny': '', 'posted': 'Posted September 8, 2..."
...,...,...
25797,76561198312638244,"{'funny': '', 'posted': 'Posted July 10.', 'la..."
25797,76561198312638244,"{'funny': '', 'posted': 'Posted July 8.', 'las..."
25798,LydiaMorley,"{'funny': '1 person found this review funny', ..."
25798,LydiaMorley,"{'funny': '', 'posted': 'Posted July 20.', 'la..."


In [9]:
reviews.sort_values('user_id', inplace=True)

In [10]:
df=reviews['reviews'].apply(pd.Series)

In [11]:
df

Unnamed: 0,funny,posted,last_edited,item_id,helpful,recommend,review,0
8342,,"Posted March 20, 2014.",,1250,1 of 1 people (100%) found this review helpful,True,หนุกคับ แนะนำ 10/10,
25239,,"Posted May 30, 2014.",,113200,0 of 1 people (0%) found this review helpful,True,One Of The Funnyest Games That Is Animated :) ...,
25239,,"Posted January 24, 2014.",,440,2 of 3 people (67%) found this review helpful,True,the best game i ever plllayed,
12954,,Posted March 2.,,730,No ratings yet,True,"it done brokeded on me, the game no longer wor...",
12954,,"Posted August 15, 2015.",,105600,No ratings yet,True,"It's an amazing game, and im glad that mac use...",
...,...,...,...,...,...,...,...,...
24058,,"Posted December 23, 2013.","Last edited March 10, 2015.",8980,0 of 4 people (0%) found this review helpful,True,A Gem.It's level of stupidity is just overwhem...,
24058,,"Posted May 18, 2013.",,440,0 of 1 people (0%) found this review helpful,True,THis FPS Shooter game really packs a punch.I'm...,
24058,,"Posted December 23, 2013.","Last edited September 25, 2014.",72850,1 of 2 people (50%) found this review helpful,True,It's been a long way since Elder Scrolls start...,
24058,,"Posted February 1, 2015.",,730,2 of 5 people (40%) found this review helpful,True,"After playing 500 hours on this fantastic FPS,...",


In [12]:
df.columns

Index([      'funny',      'posted', 'last_edited',     'item_id',
           'helpful',   'recommend',      'review',             0],
      dtype='object')

In [13]:
df.drop(columns=[      'funny',       'last_edited',     
           'helpful',             0], inplace=True)

In [14]:
df

Unnamed: 0,posted,item_id,recommend,review
8342,"Posted March 20, 2014.",1250,True,หนุกคับ แนะนำ 10/10
25239,"Posted May 30, 2014.",113200,True,One Of The Funnyest Games That Is Animated :) ...
25239,"Posted January 24, 2014.",440,True,the best game i ever plllayed
12954,Posted March 2.,730,True,"it done brokeded on me, the game no longer wor..."
12954,"Posted August 15, 2015.",105600,True,"It's an amazing game, and im glad that mac use..."
...,...,...,...,...
24058,"Posted December 23, 2013.",8980,True,A Gem.It's level of stupidity is just overwhem...
24058,"Posted May 18, 2013.",440,True,THis FPS Shooter game really packs a punch.I'm...
24058,"Posted December 23, 2013.",72850,True,It's been a long way since Elder Scrolls start...
24058,"Posted February 1, 2015.",730,True,"After playing 500 hours on this fantastic FPS,..."


In [15]:
rnames=reviews.drop_duplicates('user_id').copy()

In [16]:
rnames

Unnamed: 0,user_id,reviews
8342,--000--,"{'funny': '', 'posted': 'Posted March 20, 2014..."
25239,--ace--,"{'funny': '', 'posted': 'Posted May 30, 2014.'..."
12954,--ionex--,"{'funny': '', 'posted': 'Posted March 2.', 'la..."
18659,-2SV-vuLB-Kg,"{'funny': '', 'posted': 'Posted October 15, 20..."
367,-Azsael-,"{'funny': '', 'posted': 'Posted February 23.',..."
...,...,...
8814,zwanzigdrei,"{'funny': '', 'posted': 'Posted July 25, 2015...."
24992,zy0705,"{'funny': '', 'posted': 'Posted August 11, 201..."
21426,zynxgameth,"{'funny': '', 'posted': 'Posted May 1, 2013.',..."
24058,zyr0n1c,"{'funny': '', 'posted': 'Posted July 15, 2013...."


In [17]:
rnames.reset_index(inplace=True)

In [18]:
rnames

Unnamed: 0,index,user_id,reviews
0,8342,--000--,"{'funny': '', 'posted': 'Posted March 20, 2014..."
1,25239,--ace--,"{'funny': '', 'posted': 'Posted May 30, 2014.'..."
2,12954,--ionex--,"{'funny': '', 'posted': 'Posted March 2.', 'la..."
3,18659,-2SV-vuLB-Kg,"{'funny': '', 'posted': 'Posted October 15, 20..."
4,367,-Azsael-,"{'funny': '', 'posted': 'Posted February 23.',..."
...,...,...,...
25480,8814,zwanzigdrei,"{'funny': '', 'posted': 'Posted July 25, 2015...."
25481,24992,zy0705,"{'funny': '', 'posted': 'Posted August 11, 201..."
25482,21426,zynxgameth,"{'funny': '', 'posted': 'Posted May 1, 2013.',..."
25483,24058,zyr0n1c,"{'funny': '', 'posted': 'Posted July 15, 2013...."


In [19]:
rnames = rnames.rename(columns={'index': 'trash', 'user_id': 'user_name'})

In [20]:
rnames.reset_index(inplace=True)

In [21]:
rnames.columns

Index(['index', 'trash', 'user_name', 'reviews'], dtype='object')

In [22]:
rnames.drop(columns=['trash', 'reviews'], inplace=True)

In [23]:
rnames = rnames.rename(columns={'index':'user_id'})

In [24]:
rnames

Unnamed: 0,user_id,user_name
0,0,--000--
1,1,--ace--
2,2,--ionex--
3,3,-2SV-vuLB-Kg
4,4,-Azsael-
...,...,...
25480,25480,zwanzigdrei
25481,25481,zy0705
25482,25482,zynxgameth
25483,25483,zyr0n1c


In [25]:
rnames.to_parquet('../Datasets/rnames.parquet')

In [26]:
reviews = pd.concat([reviews, df], axis=1)

In [27]:
reviews

Unnamed: 0,user_id,reviews,posted,item_id,recommend,review
8342,--000--,"{'funny': '', 'posted': 'Posted March 20, 2014...","Posted March 20, 2014.",1250,True,หนุกคับ แนะนำ 10/10
25239,--ace--,"{'funny': '', 'posted': 'Posted May 30, 2014.'...","Posted May 30, 2014.",113200,True,One Of The Funnyest Games That Is Animated :) ...
25239,--ace--,"{'funny': '', 'posted': 'Posted January 24, 20...","Posted January 24, 2014.",440,True,the best game i ever plllayed
12954,--ionex--,"{'funny': '', 'posted': 'Posted March 2.', 'la...",Posted March 2.,730,True,"it done brokeded on me, the game no longer wor..."
12954,--ionex--,"{'funny': '', 'posted': 'Posted August 15, 201...","Posted August 15, 2015.",105600,True,"It's an amazing game, and im glad that mac use..."
...,...,...,...,...,...,...
24058,zyr0n1c,"{'funny': '', 'posted': 'Posted December 23, 2...","Posted December 23, 2013.",8980,True,A Gem.It's level of stupidity is just overwhem...
24058,zyr0n1c,"{'funny': '', 'posted': 'Posted May 18, 2013.'...","Posted May 18, 2013.",440,True,THis FPS Shooter game really packs a punch.I'm...
24058,zyr0n1c,"{'funny': '', 'posted': 'Posted December 23, 2...","Posted December 23, 2013.",72850,True,It's been a long way since Elder Scrolls start...
24058,zyr0n1c,"{'funny': '', 'posted': 'Posted February 1, 20...","Posted February 1, 2015.",730,True,"After playing 500 hours on this fantastic FPS,..."


In [28]:
reviews = reviews.rename(columns={'user_id':'user_name'})

In [29]:
reviews = pd.merge(reviews,rnames,on='user_name')

In [30]:
reviews

Unnamed: 0,user_name,reviews,posted,item_id,recommend,review,user_id
0,--000--,"{'funny': '', 'posted': 'Posted March 20, 2014...","Posted March 20, 2014.",1250,True,หนุกคับ แนะนำ 10/10,0
1,--ace--,"{'funny': '', 'posted': 'Posted May 30, 2014.'...","Posted May 30, 2014.",113200,True,One Of The Funnyest Games That Is Animated :) ...,1
2,--ace--,"{'funny': '', 'posted': 'Posted January 24, 20...","Posted January 24, 2014.",440,True,the best game i ever plllayed,1
3,--ionex--,"{'funny': '', 'posted': 'Posted March 2.', 'la...",Posted March 2.,730,True,"it done brokeded on me, the game no longer wor...",2
4,--ionex--,"{'funny': '', 'posted': 'Posted August 15, 201...","Posted August 15, 2015.",105600,True,"It's an amazing game, and im glad that mac use...",2
...,...,...,...,...,...,...,...
59328,zyr0n1c,"{'funny': '', 'posted': 'Posted December 23, 2...","Posted December 23, 2013.",8980,True,A Gem.It's level of stupidity is just overwhem...,25483
59329,zyr0n1c,"{'funny': '', 'posted': 'Posted May 18, 2013.'...","Posted May 18, 2013.",440,True,THis FPS Shooter game really packs a punch.I'm...,25483
59330,zyr0n1c,"{'funny': '', 'posted': 'Posted December 23, 2...","Posted December 23, 2013.",72850,True,It's been a long way since Elder Scrolls start...,25483
59331,zyr0n1c,"{'funny': '', 'posted': 'Posted February 1, 20...","Posted February 1, 2015.",730,True,"After playing 500 hours on this fantastic FPS,...",25483


In [31]:
reviews.drop(columns=['user_name','reviews'], inplace=True)

In [32]:
reviews

Unnamed: 0,posted,item_id,recommend,review,user_id
0,"Posted March 20, 2014.",1250,True,หนุกคับ แนะนำ 10/10,0
1,"Posted May 30, 2014.",113200,True,One Of The Funnyest Games That Is Animated :) ...,1
2,"Posted January 24, 2014.",440,True,the best game i ever plllayed,1
3,Posted March 2.,730,True,"it done brokeded on me, the game no longer wor...",2
4,"Posted August 15, 2015.",105600,True,"It's an amazing game, and im glad that mac use...",2
...,...,...,...,...,...
59328,"Posted December 23, 2013.",8980,True,A Gem.It's level of stupidity is just overwhem...,25483
59329,"Posted May 18, 2013.",440,True,THis FPS Shooter game really packs a punch.I'm...,25483
59330,"Posted December 23, 2013.",72850,True,It's been a long way since Elder Scrolls start...,25483
59331,"Posted February 1, 2015.",730,True,"After playing 500 hours on this fantastic FPS,...",25483


In [33]:
reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59333 entries, 0 to 59332
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   posted     59305 non-null  object
 1   item_id    59305 non-null  object
 2   recommend  59305 non-null  object
 3   review     59305 non-null  object
 4   user_id    59333 non-null  int64 
dtypes: int64(1), object(4)
memory usage: 2.3+ MB


In [34]:
reviews['posted'] = reviews['posted'].str.strip('.')

In [35]:
reviews['posted'] = reviews['posted'].str.extract(r'(\d{4})')

In [36]:
moda=reviews['posted'].mode()[0]

In [37]:
reviews['posted'].fillna(moda, inplace=True)

In [38]:
reviews['posted'] = reviews['posted'].astype(int)

In [39]:
reviews

Unnamed: 0,posted,item_id,recommend,review,user_id
0,2014,1250,True,หนุกคับ แนะนำ 10/10,0
1,2014,113200,True,One Of The Funnyest Games That Is Animated :) ...,1
2,2014,440,True,the best game i ever plllayed,1
3,2014,730,True,"it done brokeded on me, the game no longer wor...",2
4,2015,105600,True,"It's an amazing game, and im glad that mac use...",2
...,...,...,...,...,...
59328,2013,8980,True,A Gem.It's level of stupidity is just overwhem...,25483
59329,2013,440,True,THis FPS Shooter game really packs a punch.I'm...,25483
59330,2013,72850,True,It's been a long way since Elder Scrolls start...,25483
59331,2015,730,True,"After playing 500 hours on this fantastic FPS,...",25483


In [40]:
# Función para realizar el análisis de sentimiento
def analyze_sentiment(review):
    try:
        # Crear un objeto SentimentIntensityAnalyzer
        analyzer = SentimentIntensityAnalyzer()

        # Obtener la puntuación de sentimiento
        sentiment_score = analyzer.polarity_scores(str(review))['compound']

        # Mapear el resultado a la escala especificada
        if sentiment_score >= 0.05:
            return 2  # Positivo
        elif sentiment_score <= -0.05:
            return 0  # Negativo
        else:
            return 1  # Neutral
    except:
        return 1  # En caso de error o reseña ausente, asignar neutral

In [41]:
# Aplicar la función a la columna 'review' y crear la nueva columna 'sentiment_analysis'
reviews['sentiment_analysis'] = reviews['review'].apply(analyze_sentiment)

# Eliminar la columna original 'review' si es necesario
reviews.drop('review', axis=1, inplace=True)

reviews.to_parquet("../Datasets/reviews.parquet", index=False)

In [42]:
reviews

Unnamed: 0,posted,item_id,recommend,user_id,sentiment_analysis
0,2014,1250,True,0,1
1,2014,113200,True,1,2
2,2014,440,True,1,2
3,2014,730,True,2,2
4,2015,105600,True,2,2
...,...,...,...,...,...
59328,2013,8980,True,25483,2
59329,2013,440,True,25483,1
59330,2013,72850,True,25483,2
59331,2015,730,True,25483,2


In [43]:
reviews['sentiment_analysis'].value_counts()

sentiment_analysis
2    40010
1    11010
0     8313
Name: count, dtype: int64