In [16]:
from nltk.tag.stanford import StanfordNERTagger
from nltk.tokenize import word_tokenize, sent_tokenize
import nltk
from transformers import pipeline
import pandas as pd


!wget 'https://nlp.stanford.edu/software/stanford-ner-2018-10-16.zip'
!unzip stanford-ner-2018-10-16.zip

nltk.download('punkt')

st = StanfordNERTagger('/content/stanford-ner-2018-10-16/classifiers/english.all.3class.distsim.crf.ser.gz',
                       '/content/stanford-ner-2018-10-16/stanford-ner.jar',
                       encoding='utf-8')

# text = 'While in France, Christine Lagarde discussed short-term stimulus efforts in a recent interview with the Wall Street Journal.'

# tokenized_text = word_tokenize(text)
# classified_text = st.tag(tokenized_text)
# print(classified_text)

# Initialize the emotion classification pipeline
emotion_classifier = pipeline('text-classification', model='j-hartmann/emotion-english-distilroberta-base')

def extract_entities(sentence):
    tokenized_sentence = word_tokenize(sentence)
    tagged_sentence = st.tag(tokenized_sentence)
    entities = {"PERSON": set(), "ORGANIZATION": set(), "LOCATION": set()}
    for word, tag in tagged_sentence:
        if tag in entities:
            entities[tag].add(word)
    return tagged_sentence, entities

def truncate_sentence(sentence, max_length=510):
    tokens = word_tokenize(sentence)
    if len(tokens) > max_length:
        tokens = tokens[:max_length]
    return ' '.join(tokens)

def segment_poem_by_entities_and_emotion(poem):
    sentences = sent_tokenize(poem)

    segments = []
    segment_entities = []
    segment_emotions = []
    current_segment = []
    prev_entities = {"PERSON": set(), "ORGANIZATION": set(), "LOCATION": set()}
    prev_emotion = None

    for sentence in sentences:
        sentence = truncate_sentence(sentence)
        tagged_sentence, current_entities = extract_entities(sentence)
        emotion = emotion_classifier(sentence)[0]
        current_emotion = emotion['label']

        print(f"Sentence: {sentence}")
        print(f"NER Tags: {tagged_sentence}")
        print(f"Emotion: {current_emotion} (Score: {emotion['score']:.2f})")
        print()

        if current_emotion != prev_emotion:
            if current_segment:
                segments.append(" ".join(current_segment))
                segment_entities.append(prev_entities)
                segment_emotions.append(prev_emotion)
            current_segment = [sentence]
            prev_entities = current_entities
            prev_emotion = current_emotion
        else:
            if (not current_entities["PERSON"] and not current_entities["ORGANIZATION"] and not current_entities["LOCATION"]):
                current_segment.append(sentence)
            elif ((current_entities["PERSON"] and current_entities["PERSON"] != prev_entities["PERSON"]) or
                  (current_entities["ORGANIZATION"] and current_entities["ORGANIZATION"] != prev_entities["ORGANIZATION"]) or
                  (current_entities["LOCATION"] and current_entities["LOCATION"] != prev_entities["LOCATION"])):
                if current_segment:
                    segments.append(" ".join(current_segment))
                    segment_entities.append(prev_entities)
                    segment_emotions.append(prev_emotion)
                current_segment = [sentence]
                prev_entities = current_entities
                prev_emotion = current_emotion
            else:
                current_segment.append(sentence)

    if current_segment:
        segments.append(" ".join(current_segment))
        segment_entities.append(prev_entities)
        segment_emotions.append(prev_emotion)

    print(segments)
    print()
    print(segment_entities)
    print()
    print(segment_emotions)

    return segments, segment_entities, segment_emotions











# def segment_poem_by_entities_and_emotion1(poem):
#     sentences = sent_tokenize(poem)

#     segments = []
#     current_segment = []
#     current_entities = {"PERSON": set(), "ORGANIZATION": set(), "LOCATION": set()}
#     current_emotion = None

#     for sentence in sentences:
#         tagged_sentence, new_entities = extract_entities(sentence)
#         emotion = emotion_classifier(sentence)[0]
#         new_emotion = emotion['label']

#         print(f"Sentence: {sentence}")
#         print(f"NER Tags: {tagged_sentence}")
#         print(f"Emotion: {new_emotion} (Score: {emotion['score']:.2f})")
#         print()

#         if (new_entities["PERSON"] != current_entities["PERSON"] or
#             new_entities["ORGANIZATION"] != current_entities["ORGANIZATION"] or
#             new_entities["LOCATION"] != current_entities["LOCATION"] or
#             new_emotion != current_emotion):
#             if current_segment:
#                 segments.append(" ".join(current_segment))
#             current_segment = [sentence]
#             current_entities = new_entities
#             current_emotion = new_emotion
#         else:
#             current_segment.append(sentence)

#     if current_segment:
#         segments.append(" ".join(current_segment))

#     return segments

--2024-07-31 11:48:52--  https://nlp.stanford.edu/software/stanford-ner-2018-10-16.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 302 FOUND
Location: https://downloads.cs.stanford.edu/nlp/software/stanford-ner-2018-10-16.zip [following]
--2024-07-31 11:48:53--  https://downloads.cs.stanford.edu/nlp/software/stanford-ner-2018-10-16.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 180358328 (172M) [application/zip]
Saving to: ‘stanford-ner-2018-10-16.zip.5’


2024-07-31 11:49:24 (5.48 MB/s) - ‘stanford-ner-2018-10-16.zip.5’ saved [180358328/180358328]

Archive:  stanford-ner-2018-10-16.zip
replace stanford-ner-2018-10-16/README.txt? [y]es, [n]o, [A]ll, [N]one, 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [16]:
# from transformers import pipeline
# emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=False)

# sentence = 'While in France, Christine Lagarde discussed short-term stimulus efforts in a recent interview with the Wall Street Journal.'
# emotion = emotion_classifier(sentence)[0]
# print(emotion)
# new_emotion = emotion['label']


{'label': 'neutral', 'score': 0.8091278672218323}


In [2]:
# poem = """
# "Can someone make my simple wish come true?
# Male biker seeks female for touring fun.
# Do you live in North London? Is it you?
# Gay vegetarian whose friends are few,
# I'm into music, Shakespeare and the sun.
# Can someone make my simple wish come true?
# Executive in search of something new—
# Perhaps bisexual woman, arty, young.
# Do you live in North London? Is it you?
# Successful, straight and solvent? I am too—
# Attractive Jewish lady with a son.
# Can someone make my simple wish come true?
# I'm Libran, inexperienced and blue—
# Need slim, non-smoker, under twenty-one.
# Do you live in North London? Is it you?
# Please write (with photo) to Box 152.
# Who knows where it may lead once we've begun?
# Can someone make my simple wish come true?
# Do you live in North London? Is it you?"
# """

# segments = segment_poem_by_entities_and_emotion(poem)

# #printing the segments

# for i, segment in enumerate(segments):
#     emotion = emotion_classifier(segment)[0]
#     tokenized_segment = word_tokenize(segment)
#     tagged_segment = st.tag(tokenized_segment)

#     ner_tags = {(tag, word) for word, tag in tagged_segment if tag != 'O'}

#     print(f"Segment {i+1}:")
#     print(segment)
#     print(f"NER Tags: {ner_tags}")
#     print(f"Emotion: {emotion['label']} (Score: {emotion['score']:.2f})")
#     print()

Sentence: 
"Can someone make my simple wish come true?
NER Tags: [("''", 'O'), ('Can', 'O'), ('someone', 'O'), ('make', 'O'), ('my', 'O'), ('simple', 'O'), ('wish', 'O'), ('come', 'O'), ('true', 'O'), ('?', 'O')]
Emotion: neutral (Score: 0.52)

Sentence: Male biker seeks female for touring fun.
NER Tags: [('Male', 'O'), ('biker', 'O'), ('seeks', 'O'), ('female', 'O'), ('for', 'O'), ('touring', 'O'), ('fun', 'O'), ('.', 'O')]
Emotion: neutral (Score: 0.90)

Sentence: Do you live in North London?
NER Tags: [('Do', 'O'), ('you', 'O'), ('live', 'O'), ('in', 'O'), ('North', 'LOCATION'), ('London', 'LOCATION'), ('?', 'O')]
Emotion: neutral (Score: 0.84)

Sentence: Is it you?
NER Tags: [('Is', 'O'), ('it', 'O'), ('you', 'O'), ('?', 'O')]
Emotion: surprise (Score: 0.77)

Sentence: Gay vegetarian whose friends are few,
I'm into music, Shakespeare and the sun.
NER Tags: [('Gay', 'O'), ('vegetarian', 'O'), ('whose', 'O'), ('friends', 'O'), ('are', 'O'), ('few', 'O'), (',', 'O'), ('I', 'O'), ("'m"

TypeError: expected string or bytes-like object

In [13]:
import pandas as pd

# Create a new DataFrame with headers
df = pd.DataFrame(columns=['Title', 'Poet', 'text', 'ctext', 'Poem Link', 'our_summary', 'Segments', 'NER Tags', 'Emotions'])

# Save the DataFrame to a CSV file
df.to_csv('modified_poems.csv', index=False)


In [None]:
import csv
import pandas as pd
import os

def append_to_csv(file_path, data):
    with open(file_path, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(data)

def get_existing_row_count(file_path):
    if not os.path.exists(file_path):
        return 0
    with open(file_path, 'r') as f:
        reader = csv.reader(f)
        row_count = sum(1 for row in reader)
    return row_count-1

df = pd.read_csv('/content/IITP_Poems - emnlp3000.csv')

existing_row_count = get_existing_row_count('/content/modified_poems (8).csv')

for index, row in df.iterrows():
    if index < existing_row_count:
        continue

    poem = row['ctext']
    segments, ner_tags, emotions = segment_poem_by_entities_and_emotion(poem)

    data_to_append = [
        row['Title'],
        row['Poet'],
        row['text'],
        row['ctext'],
        row['Poem Link'],
        row['our_summary'],
        segments,
        ner_tags,
        emotions
    ]

    append_to_csv('modified_poems.csv', data_to_append)


Sentence: I love you because the Earth turns round the sun because the North wind blows north sometimes because the Pope is Catholic and most Rabbis Jewish because the winters flow into springs and the air clears after a storm because only my love for you despite the charms of gravity keeps me from falling off this Earth into another dimension I love you because it is the natural order of things I love you like the habit I picked up in college of sleeping through lectures or saying I ’ m sorry when I get stopped for speeding because I drink a glass of water in the morning and chain-smoke cigarettes all through the day because I take my coffee Black and my milk with chocolate because you keep my feet warm though my life a mess I love you because I don ’ t want it any other way I am helpless in my love for you It makes me so happy to hear you call my name I am amazed you can resist locking me in an echo chamber where your voice reverberates through the four walls sending me into spasmati