# Libraries

In [1]:
import pandas as pd
import random
import re
import math
import csv
import spacy
from heapq import nlargest

# Dataset Loading and Tokenizing

In [2]:
path=r"/content/poems.csv"

with open(path,'r',encoding='utf-8-sig') as file:
    r=csv.reader(file)
    text=[row[0] for row in r if row]

nlp=spacy.blank('ur')
tokens=[]
for i in text:
    d=nlp(i)
    tokens.extend([token.text for token in d if not re.match(r'[a-zA-Z0-9/\s\.\'\"\+\-\,\’\?\!\@\؟\#\$\%\^\&\*\۔\(\)\[\]\{\}\;\<\>\?\:\|\\]',token.text)])
print(len(tokens))
tokens=set(tokens) #keep unique tokens /////*****OPTIMIZATION*****/////
print(len(tokens))
tokens=list(tokens) #as using list onward

21289
3009


# Functions to be used

In [3]:
#to generate a simple ngram model
def generateNgramModel(tokens, n):
    nGramModel = {}
    for i in range(len(tokens)-n):
        ngram = tuple(tokens[i:i+n])
        nextWord = tokens[i+n]
        if ngram in nGramModel:
            nGramModel[ngram].append(nextWord)
        else:
            nGramModel[ngram] = [nextWord]
    return nGramModel

#to generate an ngram model using backward method
def generateBackwardNgramModel(tokens, n):
    nGramModel = {}
    for i in range(n, len(tokens)):  # Iterate starting from n-th word
        ngram = tuple(tokens[i-n:i])  # Build ngram from right context
        previousWord = tokens[i-n-1]  # Word preceding the ngram
        if ngram in nGramModel:
            nGramModel[ngram].append(previousWord)
        else:
            nGramModel[ngram] = [previousWord]
    return nGramModel


#to generate a single verse using the specified model
def generateVerse(model, verseLength, previousEndingWord=None):
    #randomly select the starting word
    startingWord = random.choice(list(model.keys()))
    verse = list(startingWord)
    for i in range(verseLength - len(startingWord)):
        #predict the next word based on the model
        if tuple(verse[-(len(startingWord)):]) in model:
            nextWord = random.choice(model[tuple(verse[-(len(startingWord)):])])
        else:
            nextWord = random.choice(tokens)  #randomly choose if not found
        verse.append(nextWord)
    if previousEndingWord:
        #ensure the last word rhymes with the previous ending word /////*****BONUS*****/////
        endingWord = findRhymingWord(verse[-1], previousEndingWord, tokens)
        verse[-1] = endingWord
    return " ".join(verse), verse[-1]  #return verse and last word

#to find a rhyming word
def findRhymingWord(word, previousWord, tokens, n=3):
    rhymes = set()
    for token in tokens:
        if (token != word and token.endswith(word[-n:])) or (token != previousWord and token.endswith(previousWord[-n:])):
            rhymes.add(token)
    #keep only the top 3 rhyming words
    rhymes = list(rhymes)
    topRhymes = nlargest(5, rhymes, key=rhymes.count)
    if topRhymes:
        return random.choice(topRhymes)
    else:
        return word

#function to generate a stanza
def generateStanza(model, verseLength, numOfVerses):
    stanza = []
    previousEndingWord = None
    for _ in range(numOfVerses):
        verse, previousEndingWord = generateVerse(model, verseLength, previousEndingWord)
        stanza.append(verse)
    return stanza

# Generate unigram, bigram, and trigram models

In [4]:
unigramModel = generateBackwardNgramModel(tokens, 1)
bigramModel = generateBackwardNgramModel(tokens, 2)
trigramModel = generateBackwardNgramModel(tokens, 3)

# Generate Poetry

In [9]:
numOfStanzas = 4
verseLength = random.randint(7, 10)

for _ in range(numOfStanzas):
    #generate stanza using bigram model
    stanza = generateStanza(bigramModel, verseLength, 4)
    for verse in stanza:
        print(verse)
    print()  # Empty line after each stanza


فاصلہ دھلے ہوجاتے بھرا وفا دیں نصابوں چپ دھتکار
ملن کو تنہائیاں پوشاک جیتا قیصرؔ خوش دروازے دورازکار
ہمیشہ ہوں!کیا جلتی نہایت پائے چپکے تھکن معینہ سرکار
غالبؔ ترک سنتی قمرؔ تیوہار دکھتا مے حامل افکار

حویلی ڈبہ انورؔ خواں دیدۂ لحظہ ممتاز جانا تیرتے
نگہبان چرچا روگ امتحان آتا ریل زلفوں کیسے ہوسکتی
مٹائے شخصیت چھایا ناکافی ڈور دیوالی انھوں دیواروں سکتی
جانو خلوت رہگذر کتابیں بدلی کوئی محروم سینا ہوسکتی

ہوکر جشن گھسیٹتے پھونک دھتکار حب بندھے پھونکتے صلاحیتیں
فرش آنکھ سامعین گار گریاں مولا بیٹیاں یہاں ضرورتیں
جاگنے رحمت تعریف آخری پھیلاؤ نگاہ مخل لکھنے آیتیں
شعلوں بند طفلان شکوے حادثہ طرز بنے ایماں شکایتیں

تیرے پہچانتی دیئے حالانکہ وہیں سراب بن معینہ نواحی
فانیؔ دھیرے پیچیدہ ہوتے نعمت نقطے جگر شے دوالی
پیسہ پیارا داریاں مہیا چھت سنا سوا حنائی خیالی
گھنا تیشے ؎ سانسو عکس جاؤں قصہ جاتا گلالی



# Comparison of bigram and trigram models

In [6]:
numStanzas = 4
verseLength = random.randint(7, 10)
bigramStanzas = []
trigramStanzas = []

#generate stanzas using the bigram and trigram models for comparison
for _ in range(numStanzas):
    bigramStanza = generateStanza(bigramModel, verseLength, 4)
    bigramStanzas.append(bigramStanza)
    trigramStanza = generateStanza(trigramModel, verseLength, 4)
    trigramStanzas.append(trigramStanza)

def calculateApproximatePerplexity(model, text):
    logLikelihood = 0.0
    numWords = 0

    for i in range(len(text) - 2):
        if model.get(tuple(text[i:i+2])):
            nextWordProb = model[tuple(text[i:i+2])][text[i + 2]]  #get probability of next word
            logLikelihood += math.log(nextWordProb)  #update log likelihood
    numWords += 1

    print(logLikelihood) #for debugging
    return math.exp(-logLikelihood / max(numWords, 1))  #calculate perplexity

for stanza in bigramStanzas:
    bigramText = ' '.join([verse for verse in stanza])
    bigramApproxPerplexity = calculateApproximatePerplexity(bigramModel, bigramText)
    print("Bigram Stanza Perplexity:", bigramApproxPerplexity)

for stanza in trigramStanzas:
    trigramText = ' '.join([verse for verse in stanza])
    trigramApproxPerplexity = calculateApproximatePerplexity(trigramModel, trigramText)
    print("Trigram Stanza Perplexity:", trigramApproxPerplexity)


0.0
Bigram Stanza Perplexity: 1.0
0.0
Bigram Stanza Perplexity: 1.0
0.0
Bigram Stanza Perplexity: 1.0
0.0
Bigram Stanza Perplexity: 1.0
0.0
Trigram Stanza Perplexity: 1.0
0.0
Trigram Stanza Perplexity: 1.0
0.0
Trigram Stanza Perplexity: 1.0
0.0
Trigram Stanza Perplexity: 1.0


---

**Note:** I have tried my best to provide accurate results in this notebook. However, these results may not be entirely accurate, and contributions or corrections are encouraged. Thank you!
