## notas

- escala so para o sentimento

- acabar, guardar, promenores

In [1]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from datetime import datetime, timedelta

---
---
---

In [2]:
# Assuming you already have the following data in your environment:
# dfS - A DataFrame containing sentiments for 'Banco Comercial Português'
# dfW - A DataFrame with top 10 words data for each date

df = pd.read_parquet("data05.parquet")

def top10_word(x):
    top10_word = {}
    for word in x:
        if x[word] is None:
            continue
        for date in x[word]["date"]:
            if date not in top10_word:
                top10_word[date] = {word: x[word]["date"][date] if x[word]["date"][date] is not None else 0}
            elif word not in top10_word[date]:
                top10_word[date][word] = x[word]["date"][date] if x[word]["date"][date] is not None else 0
            else:
                top10_word[date][word] += x[word]["date"][date] if x[word]["date"][date] is not None else 0
    for date in top10_word:
        for word in list(top10_word[date].keys()):
            if top10_word[date][word] < 1:
                del top10_word[date][word]
        top10_word[date] = [k for k, v in sorted(top10_word[date].items(), key=lambda item: item[1], reverse=True)[:10]]
    return top10_word

def sentimentos_mensais(x):
    sentimentos_mensais = {}
    for i in x:
        if i["tstamp"] not in sentimentos_mensais:
            sentimentos_mensais[i["tstamp"]] = [i["newsSentiment"]]
        else:
            sentimentos_mensais[i["tstamp"]].append(i["newsSentiment"])
    return sentimentos_mensais

def generate_dates(start, end):
    dates = []
    current = datetime.strptime(start, "%Y%m")
    end = datetime.strptime(end, "%Y%m")

    while current <= end:
        dates.append(current.strftime("%Y%m"))
        # Move to the first day of the next month
        current = (current.replace(day=1) + timedelta(days=32)).replace(day=1)

    return dates

dfS = df["news"].map(sentimentos_mensais)
dfW = df["keywords"].map(top10_word)

precos = {"BCP.LS": "",
          "GALP.LS": "",
          "EDP.LS": "",
          "SON.LS": "",
          "EGL.LS": ""}

for symbol in precos:
    # https://www.alphavantage.co
    key = "IX6KSZ9IKG9ZXMF5"
    url = f"https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY_ADJUSTED&symbol={symbol}&apikey={key}"
    r = requests.get(url)
    data = r.json()
    close_prices = {date[:7].replace("-", ""): float(details['4. close']) for date, details in data['Monthly Adjusted Time Series'].items()}
    precos[symbol] = close_prices

def stockVSnews(company, ticker, dfS, dfW, precos, name):

    top10_words = dfW[company]
    # Extract sentiment data and prices
    prices = precos[ticker]
    sentiments = {key: np.mean(value) for key, value in dfS[company].items()}

    # Get common dates between sentiments and stock prices
    #common_dates = sorted(set(sentiments.keys()) & set(prices.keys()), key=lambda x: datetime.strptime(x, '%Y%m'))
    
    start_date = min(set(sentiments.keys()) & set(prices.keys()))
    end_date = max(set(sentiments.keys()) & set(prices.keys()))
    dates = generate_dates(start_date, end_date)



    mediana = np.median(list(sentiments.values())) #remover enviesamento
    y_news = [(sentiments[date] - mediana * 0.6) if date in sentiments else 0 for date in dates]
    y_prices = [prices[date] if date in prices else None for date in dates]
    dates_asdate = [datetime.strptime(date, '%Y%m') for date in dates]


    fig = go.Figure()


    fig.add_trace(go.Scatter(x=dates_asdate, y=y_prices, mode='lines', name="Stock Price"))
    colors = ['green' if val > 0 else 'red' if val < 0 else 'black' for val in y_news]
    fig.add_trace(go.Bar(
        x=dates_asdate,
        y=np.abs(y_news),
        name="Negative Sentiment",  # You can keep the name here if you want it in the legend, but it won't affect the hover now.
        marker_color=colors,
        opacity=0.5,
        hovertemplate=[
            f"""<b>Top 10 Words :</b><br>{'<br>'.join(top10_words[dates[i]])}""" 
            if dates[i] in top10_words and top10_words[dates[i]] != [] 
            else 'News not found.' 
            for i in range(len(dates))
        ],
        hoverlabel=dict(namelength=0),
        showlegend=False
    ))
    fig.add_trace(go.Bar(
        x=[None], y=[None],  # Empty data
        name="Positive Sentiment",  # Legend label for positive sentiment
        marker_color='green',  # Green color
        visible="legendonly"  # Make this trace appear only in the legend
    ))
    fig.add_trace(go.Bar(
        x=[None], y=[None],  # Empty data
        name="Negative Sentiment",  # Legend label for negative sentiment
        marker_color='red',  # Red color
        visible="legendonly"  # Make this trace appear only in the legend
    ))

    # Customize layout and hover
    fig.update_layout(
        title=f"Stock Price and Sentiment Analysis: {company}",
        xaxis_title="Date",
        yaxis_title="Stock Price (€) / Sentiment (0-1)",
        legend_title="Legend",
        hovermode="x unified",  # Ensures hover shows all traces at once on x-axis
        template="plotly_white",  # Sets a clean template
        paper_bgcolor='rgba(0, 0, 0, 0)',
        plot_bgcolor='rgba(0, 0, 0, 0)',
        margin=dict(t=50, b=50, l=50, r=50),  # Add margins around the plot (top, bottom, left, right)
        xaxis=dict(
            showline=True,  # Show the axis line
            linecolor='black',  # Set the x-axis line color to black
            zeroline=False  # Remove the zero line (optional)
        ),
        yaxis=dict(
            showline=True,  # Show the axis line
            linecolor='black',  # Set the y-axis line color to black
            zeroline=False,  # Remove the zero line (optional)
            range=[0, max(y_prices)*1.05]
        )
    )


    fig.show()
    fig.write_html(f"stockVSsenti {name}.html")



stockVSnews("Banco Comercial Português", "BCP.LS", dfS, dfW, precos, "bcp")
stockVSnews("Galp Energia", "GALP.LS", dfS, dfW, precos, "galp")
stockVSnews("EDP", "EDP.LS", dfS, dfW, precos, "edp")
stockVSnews("Sonae", "SON.LS", dfS, dfW, precos, "sonae")
stockVSnews("Mota-Engil", "EGL.LS", dfS, dfW, precos, "motaengil")