In [None]:
import pandas as pd
import os
from transformers import pipeline

# Function for sentiment analysis
def perform_sentiment_analysis(input_file, output_file):
    df = pd.read_excel(input_file)
    pipe = pipeline("text-classification", model="ProsusAI/finbert")

    def get_sentiment(text):
        result = pipe(text)[0]
        return result['label'], result['score']

    df[['Sentiment', 'Sentiment Score']] = df['description'].apply(lambda text: pd.Series(get_sentiment(text)))
    df.to_excel(output_file, index=False)
    print(f"Sentiment analysis results saved to {output_file}")


def transform_sentiment_scores(input_file: str, output_file: str):
    """
    Transforms sentiment scores based on sentiment type and saves the result to a new Excel file.
    
    Parameters:
        input_file (str): Path to the input Excel file containing sentiment data.
        output_file (str): Path to the output Excel file to save the transformed data.
    """
    # Check if the input file exists
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"Input file not found: {input_file}")

    # Load the Excel file
    df = pd.read_excel(input_file)

    # Transform the Sentiment Score based on Sentiment type
    df["Sentiment Score"] = df.apply(
        lambda row: row["Sentiment Score"] if row["Sentiment"] == "positive"
                    else -row["Sentiment Score"] if row["Sentiment"] == "negative"
                    else 0,
        axis=1
    )

    # Select only the required columns
    new_df = df[["pub_date", "description", "Sentiment Score"]]

    # Ensure the output directory exists
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    # Save the resulting table to a new Excel file
    new_df.to_excel(output_file, index=False)

    print(f"Transformation complete. Output saved to: {output_file}")

def calculate_hourly_sentiment(input_file: str, output_file: str):
    """
    Aggregates sentiment scores hourly by summing them and saves the result to a new Excel file.
    
    Parameters:
        input_file (str): Path to the input Excel file containing sentiment data.
        output_file (str): Path to the output Excel file to save the aggregated hourly sentiment scores.
    """
    # Check if the input file exists
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"Input file not found: {input_file}")

    # Load the Excel file
    df = pd.read_excel(input_file)

    # Step 1: Round the 'pub_date' to the nearest hour
    df['pub_date'] = pd.to_datetime(df['pub_date']).dt.floor('H')

    # Step 2: Aggregate sentiment scores hourly by summing them for each hour
    hourly_sentiment = df.groupby('pub_date', as_index=False)['Sentiment Score'].sum()

    # Step 3: Rename columns for clarity
    hourly_sentiment.columns = ['date', 'Sentiment Score']

    # Ensure the output directory exists
    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    # Save the resulting table to a new Excel file
    hourly_sentiment.to_excel(output_file, index=False)

    print(f"Hourly sentiment scores calculated. Output saved to: {output_file}")