# Imports

In [1]:
'''
PLEASE READ INSTRUCTIONS ON HOW TO RUN REQUIREMENTS.TXT FILE

Transformers is package to clean, reduce, expand or generate features
Contains thousands of pre-trained models for text, vision, and audio.
'''
from transformers import pipeline
'''
Pysentimiento is part of the transformers trained models that applies sentiment analysis to spanish texts
'''
import pandas as pd
import re
from os.path import exists
'''
Google translate API package import
'''
import googletrans

# Pipeline type selected

In [2]:
# Object encapsulating the sentiment analysis pipeline via identifier
# Helps to clasify sequences according to positive or negative sentiments
senti_pipeline = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


# Test pipeline performance example

In [3]:
# Positive statement
senti_pipeline("I am extremely happy people have found refuge and food after the disaster")

[{'label': 'POSITIVE', 'score': 0.9994329810142517}]

In [4]:
# Negative statement
senti_pipeline("I am sad that the government has not provided more assistance to the affected after the hurricane")

[{'label': 'NEGATIVE', 'score': 0.9983866214752197}]

# Run pipeline on each english tweet within hurricane ian tweets dataset

In [21]:
if not exists("sa_tweets_eng_original.csv"):
    
    # Read csv containing tweets in english
    dataset = pd.read_csv('hurricane_ian.csv')
    # Create new dataframe containing results and save tweets there
    results = pd.DataFrame()
    results['tweet'] = dataset['tweet']

    labels = []
    scores = []

    # Go through each tweet and evaluate
    for tweet in results['tweet']:
        ans = senti_pipeline(tweet)
        # Append each dictionary result to corresponding list
        labels.append(ans[0]['label'])
        scores.append(ans[0]['score'])

    # Create a column with list transformed into pd series
    results['label'] = pd.Series(labels)
    results['score'] = pd.Series(scores)

    # Store results on new csv
    results.to_csv('sa_tweets_eng_original.csv')
else:
    # Read csv containing tweets in english with already created scoring and labels
    results = pd.read_csv('sa_tweets_eng_original.csv')
    results = results.drop(["Unnamed: 0"], axis=1)
    print("english sentiment analysis file already created")

english sentiment analysis file already created


In [22]:
# Test print
results.head(3)

Unnamed: 0,tweet,label,score
0,@joe_____schmoe @TonicMcD @PGATOUR @JustinThom...,NEGATIVE,0.998637
1,Hurricane Ian's reinsurance influence to exten...,POSITIVE,0.992381
2,@imoffensivers Still in Florida working on thi...,NEGATIVE,0.998555


# Translate english tweets into Spanish using Google Translate

In [23]:
# Create translator class object
translator = googletrans.Translator()
# List to store tweets in spanish
spanish = []

# Pass each tweet as a parameter into the translator object and store results
runnit = False
if not exists("spanish_tweets_translated.csv") or runnit:

    # Create new dataframe to store spanish tweets
    spanish_tweets = pd.DataFrame()

    for tweet in results['tweet']:
        trs = translator.translate(tweet, dest='es')
        spanish.append(trs.text)

    # Create column containing translated tweets
    spanish_tweets['tweet'] = pd.Series(spanish)
    spanish_tweets.to_csv("spanish_tweets_translated.csv")
else:
    # Load csv containing translated tweets
    spanish_tweets = pd.read_csv("spanish_tweets_translated.csv")
    spanish_tweets = spanish_tweets.drop(["Unnamed: 0"], axis=1)

In [24]:
# Sample print of df
spanish_tweets.head(3)

Unnamed: 0,tweet
0,@joe_____schmoe @TonicMcD @PGATOUR @JustinThom...
1,La influencia del reaseguro del huracán Ian se...
2,@imoffensivers Todavía en Florida trabajando e...


# Translate back into english same spanish tweets

In [17]:
# Pass each tweet as a parameter into the translator object and store results
runnit = False
en_tweets = []
if not exists("sa_tweets_eng_snd.csv") or runnit:

    # Create new dataframe to store english tweets
    english_tweets = pd.DataFrame()

    for tweet in spanish_tweets['tweet']:
        trs = translator.translate(tweet, dest='en')
        en_tweets.append(trs.text)

    # Create column containing translated tweets
    english_tweets['tweet'] = pd.Series(en_tweets)
    english_tweets.to_csv("sa_tweets_eng_snd.csv")
else:
    # Load csv containing translated tweets
    english_tweets = pd.read_csv("sa_tweets_eng_snd.csv")
    en_tweets = list(english_tweets['tweet'])  

In [25]:
# Print sample
english_tweets.head(3)

Unnamed: 0,tweet
0,@joe_____schmoe @TonicMcD @PGATOUR @JustinThom...
1,The influence of Hurricane Ian reinsurance wil...
2,@imoffensivers Still in Florida working on thi...


# Run pipeline on 2nd translated english tweets

In [27]:
runnit = True
if not exists("sa_tweets_eng_snd.csv") or runnit:
    labels = []
    scores = []

    # Go through each tweet and evaluate
    for tweet in english_tweets['tweet']:
        ans = senti_pipeline(tweet)
        # Append each dictionary result to corresponding list
        labels.append(ans[0]['label'])
        scores.append(ans[0]['score'])

    # Create a column with list transformed into pd series
    english_tweets['label'] = pd.Series(labels)
    english_tweets['score'] = pd.Series(scores)

    # Store results on new csv
    english_tweets.to_csv("sa_tweets_eng_snd.csv")
else:
    # Read csv containing tweets in english with already created scoring and labels
    english_tweets = pd.read_csv("sa_tweets_eng_snd.csv")
    english_tweets = english_tweets.drop(["Unnamed: 0"], axis=1)
    print("english sentiment analysis second round file already created")

In [28]:
# Print out sample df
english_tweets.head(3)

Unnamed: 0,tweet,label,score
0,@joe_____schmoe @TonicMcD @PGATOUR @JustinThom...,NEGATIVE,0.998329
1,The influence of Hurricane Ian reinsurance wil...,POSITIVE,0.982135
2,@imoffensivers Still in Florida working on thi...,NEGATIVE,0.998011
