In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

import json


In [None]:
modelName = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(modelName)
model = AutoModelForSequenceClassification.from_pretrained(modelName)

In [None]:
def calculateDailySentiment(headlines):
    texts = [headline['heading'] for headline in headlines]
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512, return_attention_mask=True)
    outputs = model(**inputs)
    logits = outputs.logits
    scores = logits.softmax(dim=1)
    averageScore = scores.mean(dim=0).tolist()
    return averageScore
#enddef



def analyzeAndSaveSentiment(inputFile, outputFile):
    with open(inputFile, 'r', encoding='utf-8') as file:  # Explicitly set encoding
        data = json.load(file)  
    #endwith

    result = {}

    for date, headlines in data.items():
        averageScore = calculateDailySentiment(headlines)
        print(f"{date} > {averageScore}")
        result[date] = averageScore
    #endfor

with open(outputFile, 'w', encoding='utf-8') as file:  # Save output as UTF-8
        json.dump(result, file, ensure_ascii=False, indent=4)
    #endwith
#enddef

In [None]:

inputFile = '/Users/ASUS/Documents/Stock-Prediction-using-News-Info-Sentiment-master/data/news2023/head_lines.json'
outputFile = '/Users/ASUS/Documents/Stock-Prediction-using-News-Info-Sentiment-master/data/news2023/dailyscores.json'

analyzeAndSaveSentiment(inputFile, outputFile)

2015-01-01 > [0.3070025146007538, 0.17011728882789612, 0.1548246145248413, 0.17130403220653534, 0.19675149023532867]
2015-01-02 > [0.282503217458725, 0.15611004829406738, 0.18065740168094635, 0.19504718482494354, 0.18568213284015656]
2015-01-03 > [0.32533952593803406, 0.14950305223464966, 0.15599913895130157, 0.18876515328884125, 0.18039311468601227]
2015-01-04 > [0.32683512568473816, 0.17659184336662292, 0.17063122987747192, 0.17169800400733948, 0.15424379706382751]
2015-01-05 > [0.3772907257080078, 0.14218366146087646, 0.14909176528453827, 0.17509512603282928, 0.15633872151374817]
2015-01-06 > [0.28608688712120056, 0.14581941068172455, 0.1645268052816391, 0.19867727160453796, 0.20488962531089783]
2015-01-07 > [0.27082398533821106, 0.15743444859981537, 0.17185550928115845, 0.18497620522975922, 0.21490982174873352]
2015-01-08 > [0.34370890259742737, 0.15521517395973206, 0.14898774027824402, 0.1641693115234375, 0.18791890144348145]
2015-01-09 > [0.3578956127166748, 0.16952630877494812, 

In [None]:
import json

def analyzeAndSaveSentiment(inputFile, outputFile):
    try:
        with open(inputFile, 'r', encoding='utf-8') as file:  # Force UTF-8
            data = json.load(file)
    except UnicodeDecodeError:
        print("Error: File encoding issue! Trying with 'utf-8-sig'...")
        with open(inputFile, 'r', encoding='utf-8-sig') as file:
            data = json.load(file)

    result = {}  # Your processing logic

    with open(outputFile, 'w', encoding='utf-8') as file:
        json.dump(result, file, indent=4)

    print("Sentiment analysis complete! ✅")


In [None]:


inputJsonFile = '/Users/ASUS/Documents/Stock-Prediction-using-News-Info-Sentiment-master/data/news2023/head_lines.json'
outputJsonFile = '/Users/ASUS/Documents/Stock-Prediction-using-News-Info-Sentiment-master/data/news2023/dailyscores.json'

analyzeAndSaveSentiment(inputJsonFile, outputJsonFile)