# step 1


In [3]:
import pandas as pd
import json

# Read the CSV file
df = pd.read_csv('inputdatafiles/infosysnews.csv')

# Initialize the dictionary
data = {}

# Iterate over the DataFrame rows
for _, row in df.iterrows():
    date = str(row['Date'])
    headline = row['News']
    
    # Check if the date already exists in the dictionary
    if date in data:
        # Append the headline to the existing list
        data[date].append({"heading": headline})
    else:
        # Create a new list with the headline
        data[date] = [{"heading": headline,}]

# Open the JSON file in write mode
with open('infosyslstmheadlines.json', 'w', encoding='utf-8') as f:
    # Write the dictionary to the file
    json.dump(data, f, ensure_ascii=False)

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

import json

In [5]:
modelName = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(modelName)
model = AutoModelForSequenceClassification.from_pretrained(modelName)

In [6]:
def calculateDailySentiment(headlines):
    texts = [headline['heading'] for headline in headlines]
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512, return_attention_mask=True)
    outputs = model(**inputs)
    logits = outputs.logits
    scores = logits.softmax(dim=1)
    averageScore = scores.mean(dim=0).tolist()
    return averageScore
#enddef

def analyzeAndSaveSentiment(inputFile, outputFile):
    with open(inputFile, 'r') as file:
        data = json.load(file)
    #endwith

    result = {}

    for date, headlines in data.items():
        averageScore = calculateDailySentiment(headlines)
        print(f"{date} > {averageScore}")
        result[date] = averageScore
    #endfor

    with open(outputFile, 'w') as outputFile:
        json.dump(result, outputFile, indent=2)
    #endwith
#enddef


In [8]:
inputJsonFile = 'infosyslstmheadlines.json'
outputJsonFile = 'infosysdaily_scores.json'

analyzeAndSaveSentiment(inputJsonFile, outputJsonFile)

03 Jan 2011 > [0.07110046595335007, 0.08782283961772919, 0.22645041346549988, 0.32487860321998596, 0.2897476851940155]
05 Jan 2011 > [0.08019362390041351, 0.10336549580097198, 0.253666490316391, 0.29862380027770996, 0.26415058970451355]
06 Jan 2011 > [0.21877044439315796, 0.21242906153202057, 0.2974112927913666, 0.16827958822250366, 0.10310959070920944]
07 Jan 2011 > [0.1756400465965271, 0.1912253350019455, 0.29557645320892334, 0.20815785229206085, 0.1294003427028656]
08 Jan 2011 > [0.20949625968933105, 0.21130837500095367, 0.2950161397457123, 0.17468442022800446, 0.10949482023715973]
10 Jan 2011 > [0.2902967929840088, 0.24703150987625122, 0.2509922683238983, 0.13314394652843475, 0.07853545248508453]
11 Jan 2011 > [0.1147967278957367, 0.14129585027694702, 0.26578018069267273, 0.2847667634487152, 0.19336052238941193]
12 Jan 2011 > [0.15983553230762482, 0.15243439376354218, 0.28128448128700256, 0.23594224452972412, 0.17050333321094513]
13 Jan 2011 > [0.1080099418759346, 0.132168814539909