### import libraries and helper function

In [1]:
# set environment
from pathlib import Path
SRC_PATH = Path('/home/ec2-user/SageMaker/david/tdm-sentiment/src/')
import sys
sys.path.append(str(SRC_PATH))
from config import *


parser = tdm_parser.TdmXmlParser()

def get_file_path():
    """
    get file path from user given the corpus name and GOID number
    """
    corpus_path = CORPUSES_PATH
    corpus_name_mapping = {
        'new york times': 'Newyork20240203',
        'los angeles times': 'LosAngelesTimesDavid',
        'washington post': 'TheWashingtonPostDavid',
        'chicago tribune': 'ChicagoTribune',
        'usa today': 'USATodayDavid'
    }

    # Get user input corpus name and normalize to lowercase
    corpus_name_input = input('Please enter Newspaper Name: ').strip().lower()

    # Check if the input is valid
    while corpus_name_input not in corpus_name_mapping:
        corpus_name_input = input('Please enter a Valid Newspaper Name: ').strip().lower()

    # Construct the file path
    corpus_name = corpus_name_mapping[corpus_name_input]
    # Get user input GOID number and convert to int
    goid_input = input('Please enter GOID number: ')
    while not goid_input.isdigit():
        goid_input = input('Please enter a Valid GOID number: ')

    full_path = corpus_path / corpus_name / f'{goid_input}.xml'
    return full_path


  from .autonotebook import tqdm as notebook_tqdm


### get file path and get attributes

In [None]:
xml_file_path = get_file_path()

# examples
# Newspaper: USA Today, GOID: 1027415041
# Newspaper: usa today, GOID: 40900705
# Newspaper: Chicago Tribune, GOID: 420425293
# Newspaper: chicago tribune, GOID: 1552402040
# Newspaper: los angeles times , GOID: 421073540
# Newspaper: Los Angeles times, GOID: 421073540
# Newspaper: new york times, GOID: 432877202
# Newspaper: new york times, GOID: 759991258
# Newspaper: washington post, GOID: 156229992
# Newspaper: washington post, GOID: 1672802972

#get attributes
soup = parser.get_xml_soup(xml_file_path)  # get soup
title = soup.find('Title').text.strip()    # extract title
texts = parser.get_art_text(soup)          # extract text

# print article attributes
print(f'Title: {title} \n')

print('Text: \n')
for text in texts:  # print text
    print(text)


### get sentiment model

In [6]:
import sentiment.sentiment_model.sentiment_score_old as sentiment_score_old

# Example usage:
sentiment_analyzer = sentiment_score_old.TextAnalysis(SENTIMENT_MODEL_PATH_CLASSIC)
pipeline_instance = sentiment_analyzer.get_pipeline()  # Retrieve the pipeline directly if needed

text = "I love using this model because it's really effective!"
def get_sentiment(text: str):
    """
    Get all sentiment labels and their natural probabilities for the provided text
    using the sentiment pipeline.

    Args:
        text (str): The text to analyze.

    Returns:
        list: A list of dictionaries containing each label's natural probability.
              Each dictionary typically contains keys like 'label' and 'score'.
    """
    return pipeline_instance(text, return_all_scores=True)


result = get_sentiment(text)
print(result)


BERT model loaded successfully from 'C:\Users\pc\Documents\work\bank of israel\financial division\yossi\tdm-sentiment\src\sentiment\sentiment_model\distilbert-base-uncased-finetuned-sst-2-english' on device -1.
[[{'label': 'NEGATIVE', 'score': 0.00012689229333773255}, {'label': 'POSITIVE', 'score': 0.9998730421066284}]]




In [None]:
# set environment
from pathlib import Path
SRC_PATH = Path('/home/ec2-user/SageMaker/david/tdm-sentiment/src/')
import sys
sys.path.append(str(SRC_PATH))
from config import *

text = "I love using this model because it's really effective!"
text = "In recent years, the global economy has faced huge changes and unexpected challenges that are changing the way countries and industries operate. After the pandemic, it’s clear that flexible government spending, creative money management, and strong supply chains are essential. These shifts have exposed weak points in our system but also opened up new chances for growth and improvement."

# Example usage classic model:
sentiment_analyzer = sentiment_score.TextAnalysis(SENTIMENT_MODEL_PATH_CLASSIC)
sentiment_dict = sentiment_analyzer.get_sentiment_dict(text)

print('\n', '-'*20)
print(f'text: {text}\n')
print("Raw old probabilities:")
print(f'Sentiment distribution: {sentiment_dict}')

# Get the computed overall sentiment score.
score = sentiment_analyzer.txt_score(text)
print(f"Overall sentiment score: {score}")
print('-'*20, '\n')


# Example usage finbert model:
sentiment_analyzer = sentiment_score.TextAnalysis(SENTIMENT_MODEL_PATH)
sentiment_dict = sentiment_analyzer.get_sentiment_dict(text)

print('\n', '-'*20)
print(f'text: {text}\n')
print("Raw finbert probabilities:")
print(f'Sentiment distribution: {sentiment_dict}')

# Get the computed overall sentiment score.
score = sentiment_analyzer.txt_score(text)
print(f"Overall sentiment score: {score}")
print('-'*20, '\n')


  from .autonotebook import tqdm as notebook_tqdm


--------------------
Raw old probabilities:
BERT model loaded successfully from 'C:\Users\pc\Documents\work\bank of israel\financial division\yossi\tdm-sentiment\src\sentiment\sentiment_model\distilbert-base-uncased-finetuned-sst-2-english' on device -1.




text: I love using this model because it's really effective!
Sentiment distribution: {'negative': 0.00012689229333773255, 'positive': 0.9998730421066284}
Overall sentiment score: 0.9997462153966763
--------------------
Raw finbert probabilities:
BERT model loaded successfully from 'C:\Users\pc\Documents\work\bank of israel\financial division\yossi\tdm-sentiment\src\sentiment\sentiment_model\finbert_local' on device -1.
text: I love using this model because it's really effective!
Sentiment distribution: {'neutral': 0.00645602447912097, 'positive': 0.9934628009796143, 'negative': 8.122631697915494e-05}
Overall sentiment score: 0.999836491761316


In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Replace with the model ID you want to download (e.g., FinBERT’s ID)
model_name = "yiyanghkust/finbert-tone"

# This automatically downloads and caches the model, including 'pytorch_model.bin'
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("PyTorch model and tokenizer downloaded successfully.")

# Define a local directory where you want to store the downloaded model files
local_dir = "./finbert_local"

# Save the model and tokenizer locally
model.save_pretrained(local_dir)
tokenizer.save_pretrained(local_dir)

print(f"Model and tokenizer have been saved to {local_dir}")