In [1]:
import pandas as pd
import json

# Read the CSV file
df = pd.read_csv('hdfcnews.csv')

# Initialize the dictionary
data = {}

# Iterate over the DataFrame rows
for _, row in df.iterrows():
    date = str(row['Date'])
    headline = row['News']
    
    # Check if the date already exists in the dictionary
    if date in data:
        # Append the headline to the existing list
        data[date].append({"heading": headline})
    else:
        # Create a new list with the headline
        data[date] = [{"heading": headline,}]

# Open the JSON file in write mode
with open('hdfclstmmheadlines.json', 'w', encoding='utf-8') as f:
    # Write the dictionary to the file
    json.dump(data, f, ensure_ascii=False)

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

import json

In [3]:
modelName = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(modelName)
model = AutoModelForSequenceClassification.from_pretrained(modelName)

In [5]:
def calculateDailySentiment(headlines):
    texts = [headline['heading'] for headline in headlines]
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512, return_attention_mask=True)
    outputs = model(**inputs)
    logits = outputs.logits
    scores = logits.softmax(dim=1)
    averageScore = scores.mean(dim=0).tolist()
    return averageScore
#enddef

def analyzeAndSaveSentiment(inputFile, outputFile):
    with open(inputFile, 'r') as file:
        data = json.load(file)
    #endwith

    result = {}

    for date, headlines in data.items():
        averageScore = calculateDailySentiment(headlines)
        print(f"{date} > {averageScore}")
        result[date] = averageScore
    #endfor

    with open(outputFile, 'w') as outputFile:
        json.dump(result, outputFile, indent=2)
    #endwith
#enddef


In [8]:
inputJsonFile = 'hdfclstmheadlines.json'
outputJsonFile = 'hdfcdaily_scores.json'

analyzeAndSaveSentiment(inputJsonFile, outputJsonFile)

04 Jan 2011 > [0.14239197969436646, 0.19975623488426208, 0.3116403818130493, 0.21831227838993073, 0.12789909541606903]
07 Jan 2011 > [0.181715190410614, 0.17870093882083893, 0.28740230202674866, 0.2066875547170639, 0.1454940140247345]
08 Jan 2011 > [0.1972026526927948, 0.19367334246635437, 0.29231759905815125, 0.187404066324234, 0.1294022798538208]
10 Jan 2011 > [0.06704159826040268, 0.13708177208900452, 0.3223133683204651, 0.2955230176448822, 0.1780402511358261]
11 Jan 2011 > [0.212809756398201, 0.19509904086589813, 0.2869774401187897, 0.1795252114534378, 0.1255885809659958]
12 Jan 2011 > [0.18756042420864105, 0.17427828907966614, 0.2843610942363739, 0.20473213493824005, 0.14906801283359528]
18 Jan 2011 > [0.03784029558300972, 0.05387088656425476, 0.17742109298706055, 0.38274717330932617, 0.3481205403804779]
19 Jan 2011 > [0.06700679659843445, 0.07011174410581589, 0.18485011160373688, 0.32667720317840576, 0.35135406255722046]
20 Jan 2011 > [0.18234862387180328, 0.1999097466468811, 0.2