# **Analisis de sentimientos**

## **1.Cargando las librerías necesarias**

In [None]:
!pip install vaderSentiment



In [None]:
import pandas as pd #Cargaremos pandas para manejar los datos",
import nltk #Cargamos la libreria nltk para el procesamiento de texto
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Cargamos la libreria vadersentiment
import re # Importamos re para control de expresiones regulares
from google.colab import drive
drive.mount('/content/drive')
nltk.download('punkt') # Instalamos punkt
nltk.download('stopwords') # Instalamos las stopwords
nltk.download('vader_lexicon') # Y instalamos vader


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

## **2.Carga de Datos**

In [None]:
url=r'/content/drive/MyDrive/2023/Henry/MLOps Henry/Data_Clean/clean_user_reviews.parquet.gzip'

In [None]:
df_user_reviews = pd.read_parquet(url) # Leemos nuestras reseñas

## 3.Funciones para la transformacion y preprocesamiento de texto

### 3.1.Funciones para la transformacion y preprocesamiento de texto

In [None]:
def tokenizacion(x):

  """

  Definimos la función de tokenización y eliminacion de stopwords:

  la funcion extrae las stopwords de la libreria nltk a excepcion de not
  y despues tokeniza y elimina las stopwords
  Posteriormente se devuelve la lista de tokens.

  """
  if isinstance(x, str):

    stopwords_list = nltk.corpus.stopwords.words('english')
    stopwords_list = [word for word in stopwords_list if 'not' not in word]


    tokens = nltk.tokenize.word_tokenize(x)
    tokens = [word for word in tokens if word.lower() not in stopwords_list]


    return ' '.join(tokens)

  else:
      return x



def delete_repeated_characters(text):

  """

  Creamos una funcion que elimina caracteres repetidos en las palabras
  en esta ocasion usaremos re para eliminar caracteres que se repitan mas de 2 veces
  puesto que asi exceptuamos que se elimien palabras como call ,issue ,etc
  como usualmente este patron se repite en vocales lo transformamos en una vocal

  """

  pattern = re.compile(r'(.)\1{2,}', re.DOTALL)
  text_limpio = pattern.sub(r'\1\1', text)

  return text_limpio


# Llamamos a la funcion SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()



def analyze_sentiment(text):

  """
  Función para realizar análisis de sentimiento de las reseñas y asignar valor
  según escala. 0: Negativa ; 1: Neutra ; 2: Positiva.

  """
  if pd.isnull(text) or text == '':
      return 1
  p_scores = sid.polarity_scores(text)
  sentiment = p_scores.get('compound')
  if sentiment > 0:
      return 2
  if sentiment < 0:
      return 0
  else:
      return 1





### 3.2. Transformacion

In [None]:
df_user_reviews['review'] = df_user_reviews['review'].apply(lambda x: tokenizacion(x))
df_user_reviews['review'] = df_user_reviews['review'].apply(lambda x: delete_repeated_characters(x))
df_user_reviews['sentiment_analysis'] = df_user_reviews['review'].apply(analyze_sentiment)

In [20]:
df_user_reviews.head()


Unnamed: 0,user_id,user_url,item_id,recommend,review,Posted Date,Date last edited,funny review votes,Helpful review votes,total review votes,sentiment_analysis
0,76561197970982479,http://steamcommunity.com/profiles/76561197970...,1250,1,simple yet great replayability opinion zombie ...,2011-11-05,2011-11-05,0,0,0,2
1,76561197970982479,http://steamcommunity.com/profiles/76561197970...,22200,1,unique worth playthrough,2011-07-15,2011-07-15,0,0,0,2
2,76561197970982479,http://steamcommunity.com/profiles/76561197970...,43110,1,great atmosphere gunplay bit chunky times end ...,2011-04-21,2011-04-21,0,0,0,2
3,js41637,http://steamcommunity.com/id/js41637,251610,1,know think see title barbie dreamhouse party n...,2014-06-24,2014-06-24,0,15,20,2
4,js41637,http://steamcommunity.com/id/js41637,227300,1,simple actually not simple truck driving simul...,2013-09-08,2013-09-08,0,0,1,2


## 4.Exportacion

In [None]:
df_user_reviews.to_parquet('/content/drive/MyDrive/2023/Henry/MLOps Henry/Data_Clean/clean_user_reviews.parquet.gzip', compression='gzip',index=False)