In [49]:
from flair.models import TextClassifier
from flair.models import RelationExtractor, SequenceTagger
from flair.data import Sentence
import pandas as pd
from textblob import TextBlob
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [50]:

def flair_data_wrangling(raw_prediction):

    sign = 1
    mungled_prediction = str(raw_prediction[0])
    
    if 'NEGATIVE' in mungled_prediction:
        sign = -sign
        
    mungled_prediction = sign * float(mungled_prediction[-10:].split('(')[1].split(')')[0])
    
    return mungled_prediction

In [51]:
def run_sentiment_analysis(input_filename, output_filename_1):
      
      data = pd.read_csv(input_filename, index_col= 1)
      
      sentiment_analysis = dict(flair_score = [],textblob_polarity = [], textblob_subjectivity = [], 
                                vader_pos =[], vader_neu = [], vader_neg = [], vader_com = [])
      
      result = [None] * len(sentiment_analysis)
      
      ##### flair sentiment analysis
      
      classifier = TextClassifier.load('en-sentiment')
      analyzer = SentimentIntensityAnalyzer()

      for x in data.index:
            
            text = data.at[x, 'text']
            sentence = Sentence(text)
            classifier.predict(sentence)
            raw_prediction = sentence.labels
            
            result[0] = flair_data_wrangling(raw_prediction)
            
            sentiment_analysis['flair_score'].append(result[0])
      
      ##### Textblob sentiment analysis

            blob = TextBlob(text)
            result[1] = blob.sentiment.polarity
            result[2] = blob.sentiment.subjectivity
            
            sentiment_analysis['textblob_polarity'].append(result[1])
            sentiment_analysis['textblob_subjectivity'].append(result[2])
            
      ##### vader sentiment analysis
      
            vs = analyzer.polarity_scores(text)
            
            result[3] = vs['pos']  
            result[4] = vs['neu'] 
            result[5] = vs['neg']  
            result[6] = vs['compound']
            
            sentiment_analysis['vader_pos'].append(result[3])
            sentiment_analysis['vader_neu'].append(result[4])
            sentiment_analysis['vader_neg'].append(result[5])
            sentiment_analysis['vader_com'].append(result[6])

# print(sentiment_analysis)

      senti_an_df = pd.DataFrame(sentiment_analysis)
      senti_an_df.index.names = ['text_ID']
      senti_an_df.to_csv(output_filename_1,index=True)
                  

In [52]:
def run_result_stats(output_filename_1, output_filename_2):
    
    data = pd.read_csv(output_filename_1)
    data = data.describe()
    data = data.iloc[: , 1:]
    # pd.options.display.float_format = '{:,.3f}'.format
    # print(data)    
    data.to_csv(output_filename_2, float_format= '{:,.3f}'.format,  index=True)

In [53]:
if __name__ == "__main__":
    
    input_filename = 'google_news_data_cleaned.csv'
    output_filename_1 = 'sentiment_analysis_result.csv'
    output_filename_2 = 'result_stats.csv'
    run_sentiment_analysis(input_filename, output_filename_1)
    run_result_stats(output_filename_1, output_filename_2)

2022-10-27 17:57:01,449 loading file /Users/Deo/.flair/models/sentiment-en-mix-distillbert_4.pt
