In [None]:
import pandas as pd
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud

import nltk
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
from nltk import FreqDist
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk import ngrams

from transformers import BertTokenizer, BertForSequenceClassification
finbert = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
import torch

import warnings
warnings.filterwarnings("ignore")

In [18]:
import pandas as pd

df = pd.read_csv("./Datos ecopetrol/sentimiento/Datos_sentimiento_definitivos_EC.csv")
df = df.tail(5).drop(columns=["Unnamed: 0"])
df = df.drop(columns=["headline_vader_sentiment","average_headline_vader_10periods"])
df = df.rename(columns={"negative":"negative_finbert","neutral":"neutral_finbert","positive":"positive_finbert"})
df = df.round(2)
df.columns = df.columns.str.replace("negative", "neg")
df.columns = df.columns.str.replace("positive", "pos")
df.columns = df.columns.str.replace("neutral", "neu")
df.columns = df.columns.str.replace("average", "avg")
df = df.rename(columns={"title_vader_sentiment":"avg_vader"})
df = df.rename(columns={"avg_title_vader_10periods":"avg_vader_10periods"})
df = df.rename(columns={"finbert_numeric":"avg_finbert"})
df

Unnamed: 0,Date,neg_vader,neu_vader,pos_vader,neg_finbert,neu_finbert,pos_finbert,avg_vader,avg_finbert,avg_vader_10periods,avg_finbert_10periods
583,2023-12-03,0,4,9,1,7,5,0.14,0.25,0.13,0.22
584,2023-12-10,3,2,6,3,3,5,0.04,0.31,0.12,0.18
585,2023-12-17,1,0,9,2,2,6,0.11,0.37,0.13,0.21
586,2023-12-24,2,3,1,1,4,1,0.04,-0.17,0.12,0.15
587,2023-12-31,0,0,1,0,0,1,0.0,1.0,0.11,0.25


In [54]:
df = pd.read_csv("./Datos ecopetrol/sentimiento/Datos_sentimiento_ecopetrol_2.csv")
df = df[["Date","Title","finbert sentiment","title_vader_sentiment"]]
title1 = df.loc[(df["finbert sentiment"]=="positive") & (df["title_vader_sentiment"]>0)]#.tail(5).head(1)
title1 = title1.tail(10).tail(5).head(1)

title2 = df.loc[(df["finbert sentiment"]=="negative") & (df["title_vader_sentiment"]<0)]#.tail(5).head(1)
title2 = title2.tail(4).head(1)

title3 = df.loc[(df["finbert sentiment"]=="neutral") & (df["title_vader_sentiment"]==0)]#.tail(5).head(1)
title3 = title3.tail(20).head(7).tail(1)

titles = pd.concat([title1,title2,title3])
titles.to_excel("titles.xlsx")

In [None]:
data = pd.read_csv("")
data["Date"] = pd.to_datetime(data["Date"])
data = data.sort_values(by="Date")

In [None]:
def get_sentiment(text):
    labels_mapping = {0: "positive", 1: "negative", 2: "neutral"}
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    outputs = finbert(**inputs)
    probabilities = torch.nn.functional.softmax(outputs[0], dim=-1)
    max_prob, predicted_label = torch.max(probabilities, dim=-1)
    sentiment_label = labels_mapping[predicted_label.item()]

    return sentiment_label

X = data['Title'].to_list()

sent_val = list()
for x in X:
    try:
      val = get_sentiment(x)
      sent_val.append(val)
    except:
      sent_val.append("neutral")

data["finbert sentiment"] = sent_val

data['finbert_numeric'] = data['finbert sentiment'].apply(lambda x: 1 if x == 'positive' else (-1 if x == 'negative' else 0))

In [None]:
sia = SentimentIntensityAnalyzer()
X = data['Title'].to_list()
sentiments = []
for x in X:
    sentiment_score = sia.polarity_scores(x)['compound']
    sentiments.append(sentiment_score)

data["title_vader_sentiment"] = sentiments

In [None]:
data['finbert_numeric'] = data['finbert sentiment'].apply(lambda x: 1 if x == 'positive' else (-1 if x == 'negative' else 0))

data["vader_title_numeric"] = data['title_vader_sentiment'].apply(lambda x: 1 if x>0 else (-1 if x<0 else 0))
sentiment = data[["Date", 'finbert sentiment', 'finbert_numeric','title_vader_sentiment',"vader_title_numeric"]]
sentiment['Date'] = pd.to_datetime(sentiment['Date'])

In [None]:
sentiment_counts_finbert = pd.crosstab(sentiment['Date'], sentiment['finbert sentiment']).reset_index()
sentiment_counts_vader = pd.crosstab(sentiment['Date'], sentiment['vader_title_numeric']).reset_index()
average_vader_title = sentiment.groupby('Date')['title_vader_sentiment'].mean().reset_index()
average_finbert_sentiment = sentiment.groupby('Date')['finbert_numeric'].mean().reset_index()
sentiment = pd.merge(sentiment_counts_vader,sentiment_counts_finbert, on='Date')
sentiment = pd.merge(sentiment,average_vader_title, on='Date')
sentiment = pd.merge(sentiment,average_finbert_sentiment, on='Date')
column_name_mapping = {-1: "negative_vader", 0: "neutral_vader", 1: "positive_vader"}
sentiment.columns = [column_name_mapping.get(col, col) for col in sentiment.columns]

In [None]:
week_sentiment = sentiment.groupby(sentiment['Date'].dt.to_period("W")).agg({
    "negative_vader": 'sum',
    "neutral_vader": 'sum',
    "positive_vader": 'sum',
    "negative": 'sum',
    "neutral": 'sum',
    "positive": 'sum',
    'title_vader_sentiment': 'mean',
    'finbert_numeric': 'mean',
}).reset_index()

week_sentiment["Date"] = pd.to_datetime(week_sentiment["Date"].astype(str).str.split('/', expand=True)[1])

In [None]:
week_sentiment['average_title_vader_10periods'] = week_sentiment["title_vader_sentiment"].rolling(window=10).mean()
week_sentiment['average_finbert_10periods'] = week_sentiment['finbert_numeric'].rolling(window=10).mean()

In [None]:
week_sentiment.to_csv("./Datos/sentiment_data.csv")