# **Actividad NLP 2.0 - Análisis de Sentimientos**
---
Diego Sú Gómez - A01620476

## **Importación de librerías**

In [21]:
#Importar las librerías
import pandas as pd
import numpy as np
import nltk 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download("stopwords")
nltk.download("vader_lexicon")
nltk.download('punkt')
pd.options.mode.chained_assignment = None  # default='warn'

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\diego\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\diego\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\diego\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## **Aplicación del análisis de sentimientos**

In [8]:
#Descargar el dataset con los tweets
df = pd.read_csv("dataset_tweets.csv", header=None, names=["userid","date","query","username","tweet"])
df = df[["tweet"]]
df.head()

Unnamed: 0,tweet
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
0,is upset that he can't update his Facebook by ...
0,@Kenichan I dived many times for the ball. Man...
0,my whole body feels itchy and like its on fire
0,"@nationwideclass no, it's not behaving at all...."


In [26]:
df.shape

(1048576, 1)

In [56]:
df_short = df.iloc[:200000, :]
df_short

Unnamed: 0,tweet
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
0,is upset that he can't update his Facebook by ...
0,@Kenichan I dived many times for the ball. Man...
0,my whole body feels itchy and like its on fire
0,"@nationwideclass no, it's not behaving at all...."
...,...
0,Doesn't feel good.
0,work... again
0,@damienfranco Its so common for it to crash no...
0,my baby boy is wearing big boy underwear


In [57]:
#Hacer una función para preprocesar los tweets
def process_text(text):
    tokens = word_tokenize(text.lower())
    tokens_nostopwords = [token for token in tokens if token not in stopwords.words("english")] #Eliminar las stopwords

    lemmatizer = WordNetLemmatizer()
    l_tokens = [lemmatizer.lemmatize(token) for token in tokens_nostopwords] #Lematizar los tokens restantes

    processed_text = ' '.join(l_tokens) #Unirlos en un texto simple
    return processed_text

In [58]:
#Aplicar el preprocesamiento al dataset
df_short["tweet_processed"] = df_short["tweet"].apply(process_text)
df_short.head()

Unnamed: 0,tweet,tweet_processed
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",@ switchfoot http : //twitpic.com/2y1zl - awww...
0,is upset that he can't update his Facebook by ...,upset ca n't update facebook texting ... might...
0,@Kenichan I dived many times for the ball. Man...,@ kenichan dived many time ball . managed save...
0,my whole body feels itchy and like its on fire,whole body feel itchy like fire
0,"@nationwideclass no, it's not behaving at all....","@ nationwideclass , 's behaving . 'm mad . ? c..."


In [59]:
#Realizar la función para el sentiment analysis de cada uno de los tweets
analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    score = analyzer.polarity_scores(text)
    if score["compound"] >= 0.05:
        feel = "Positive"
    elif score["compound"] <= -0.05:
        feel = "Negative"
    else:
        feel = "Neutral"
    return feel

In [60]:
#Analizar los sentimientos de cada uno de los tweets
df_short["sentiment"] = df_short["tweet_processed"].apply(get_sentiment)
df_short.head()

Unnamed: 0,tweet,tweet_processed,sentiment
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",@ switchfoot http : //twitpic.com/2y1zl - awww...,Negative
0,is upset that he can't update his Facebook by ...,upset ca n't update facebook texting ... might...,Negative
0,@Kenichan I dived many times for the ball. Man...,@ kenichan dived many time ball . managed save...,Positive
0,my whole body feels itchy and like its on fire,whole body feel itchy like fire,Negative
0,"@nationwideclass no, it's not behaving at all....","@ nationwideclass , 's behaving . 'm mad . ? c...",Negative


In [61]:
df_short["sentiment"].value_counts()

Negative    75983
Positive    70290
Neutral     53727
Name: sentiment, dtype: int64

In [62]:
df_short[df_short["sentiment"]=="Positive"].head()

Unnamed: 0,tweet,tweet_processed,sentiment
0,@Kenichan I dived many times for the ball. Man...,@ kenichan dived many time ball . managed save...,Positive
0,Need a hug,need hug,Positive
0,@LOLTrish hey long time no see! Yes.. Rains a...,@ loltrish hey long time see ! yes .. rain bit...,Positive
0,"@smarrison i would've been the first, but i di...","@ smarrison would 've first , n't gun . really...",Positive
0,@iamjazzyfizzle I wish I got to watch it with ...,@ iamjazzyfizzle wish got watch ! ! miss @ iam...,Positive


In [63]:
df_short[df_short["sentiment"]=="Negative"].head()

Unnamed: 0,tweet,tweet_processed,sentiment
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",@ switchfoot http : //twitpic.com/2y1zl - awww...,Negative
0,is upset that he can't update his Facebook by ...,upset ca n't update facebook texting ... might...,Negative
0,my whole body feels itchy and like its on fire,whole body feel itchy like fire,Negative
0,"@nationwideclass no, it's not behaving at all....","@ nationwideclass , 's behaving . 'm mad . ? c...",Negative
0,@caregiving I couldn't bear to watch it. And ...,@ caregiving could n't bear watch . thought ua...,Negative


In [64]:
df_short[df_short["sentiment"]=="Neutral"].head()

Unnamed: 0,tweet,tweet_processed,sentiment
0,@Kwesidei not the whole crew,@ kwesidei whole crew,Neutral
0,@Tatiana_K nope they didn't have it,@ tatiana_k nope n't,Neutral
0,@twittera que me muera ?,@ twittera que muera ?,Neutral
0,spring break in plain city... it's snowing,spring break plain city ... 's snowing,Neutral
0,I just re-pierced my ears,re-pierced ear,Neutral


## **Contabilización de oraciones**
---

In [65]:
#Realizar una función para contabilizar el número de oraciones de los tweets seleccionados
def count_sentences(text):
    sents = sent_tokenize(text)
    return len(sents)

In [66]:
df_short["no_sentences"] = df_short["tweet_processed"].apply(count_sentences)
df_short.head()

Unnamed: 0,tweet,tweet_processed,sentiment,no_sentences
0,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",@ switchfoot http : //twitpic.com/2y1zl - awww...,Negative,3
0,is upset that he can't update his Facebook by ...,upset ca n't update facebook texting ... might...,Negative,2
0,@Kenichan I dived many times for the ball. Man...,@ kenichan dived many time ball . managed save...,Positive,2
0,my whole body feels itchy and like its on fire,whole body feel itchy like fire,Negative,1
0,"@nationwideclass no, it's not behaving at all....","@ nationwideclass , 's behaving . 'm mad . ? c...",Negative,4


In [67]:
#Obtener el número total de oraciones de los tweets seleccionados
print("Suma total de oraciones:",df_short["no_sentences"].sum())
print("Total de tweets usados:",len(df_short))
print("Media de oraciones por tweet:",df_short["no_sentences"].mean())

Suma total de oraciones: 366698
Total de tweets usados: 200000
Media de oraciones por tweet: 1.83349
