In [7]:
import random
from nltk import ngrams

# Load the Poetry list from iqbal.txt and ghalib.txt
poetry_list = []

with open('iqbal.txt', 'r', encoding='utf-8') as file:
    poetry_list += file.read().splitlines()

with open('ghalib.txt', 'r', encoding='utf-8') as file:
    poetry_list += file.read().splitlines()

# Tokenize the poetry_list into words

words = []
for line in poetry_list:
    word_list = line.split()
    for word in word_list:
        words.append(word)

# Generate Bigram words
bigram_words = {}

for word1, word2 in ngrams(words, 2):
    if word1 not in bigram_words:
        bigram_words[word1] = []
    bigram_words[word1].append(word2)

# Function to generate a ghazal verse using a model
def generate_ghazal_verse(model, length_range):
    verse = []
    length = random.randint(length_range[0], length_range[1])
    
    while length > 0:
        if not verse:
            # Start with a random word from the poetry list
            current_word = random.choice(words)
        else:
            # Choose the next word based on the bigram words
            next_words = model.get(verse[-1], [])
            if not next_words:
                break
            current_word = random.choice(next_words)
        
        verse.append(current_word)
        length -= 1
    
    return verse

# Function to check if the last word of the verses rhyme
def last_words_rhyme(verses):
    last_word = verses[0][-1]
    for verse in verses[1:]:
        if last_word[-2:] != verse[-1][-2:]:
            return False
    return True

# Generating full ghazal with rhyming stanza 
def generate_complete_ghazal(stanza_count, verses_per_stanza):
    for _ in range(stanza_count):
        while True:
            
            rhyming_verses = []
            for _ in range(verses_per_stanza):
                verse = generate_ghazal_verse(bigram_words, (7, 10))
                rhyming_verses.append(verse)
            
            if last_words_rhyme(rhyming_verses):
                break
        
        for verse in rhyming_verses:
            print(" ".join(verse))
        
        print()  

generate_complete_ghazal(3, 4)


نہ پُوچھ شگفتگی ہے اتراتا وگرنہ میں
ھے کس سے آپ ہی سے ھیں
کئے ہوئے ہیں گرفتارِ وفا، زنداں میں
گرم طواف اولیٰ بہت ستمگر کو پردے میں

کے اپنی کیا ہے جس کو دعائیں
تو دیتا ہے بجلی سے ملتیں جب سر پر پڑیں
تیرے آئے ہیں فاش رموز قلندری میری چینِ جبیں
نہ دوں غالب اس کی پرواز میں

خودی کی موت وہ نہیں کرتے ہیں
کی یا رب خداوندان مکتب کی جزا کہیں
ہوگا تمہاری تہذیب اپنے کو‘ کبھی تو غربت میں
دی پرہیز بچھائی ہے وہی اول دیتے ہیں



In [3]:
import nltk
from nltk.corpus import stopwords

# Define a dictionary to store the counts of stopwords for each language
def classify_language(text):
    language_counts = {
        'arabic': 0,
        'azerbaijani': 0,
        'basque': 0,
        'bengali': 0,
        'catalan': 0,
        'chinese': 0,
        'danish': 0,
        'dutch': 0,
        'english': 0,
        'finnish': 0,
        'french': 0,
        'german': 0,
        'greek': 0,
        'hebrew': 0,
        'hinglish': 0,
        'hungarian': 0,
        'indonesian': 0,
        'italian': 0,
        'kazakh': 0,
        'nepali': 0,
        'norwegian': 0,
        'portuguese': 0,
        'romanian': 0,
        'russian': 0,
        'slovene': 0,
        'spanish': 0,
        'swedish': 0,
        'tajik': 0,
        'turkish': 0
    }

    # Tokenize the text and remove punctuation
    words = nltk.wordpunct_tokenize(text.lower())

    # Define stopwords for each language
    language_stopwords = {
        'arabic': set(stopwords.words('arabic')),
        'azerbaijani': set(stopwords.words('azerbaijani')),
        'basque': set(stopwords.words('basque')),
        'bengali': set(stopwords.words('bengali')),
        'catalan': set(stopwords.words('catalan')),
        'chinese': set(stopwords.words('chinese')),
        'danish': set(stopwords.words('danish')),
        'dutch': set(stopwords.words('dutch')),
        'english': set(stopwords.words('english')),
        'finnish': set(stopwords.words('finnish')),
        'french': set(stopwords.words('french')),
        'german': set(stopwords.words('german')),
        'greek': set(stopwords.words('greek')),
        'hebrew': set(stopwords.words('hebrew')),
        'hinglish': set(stopwords.words('hinglish')),
        'hungarian': set(stopwords.words('hungarian')),
        'indonesian': set(stopwords.words('indonesian')),
        'italian': set(stopwords.words('italian')),
        'kazakh': set(stopwords.words('kazakh')),
        'nepali': set(stopwords.words('nepali')),
        'norwegian': set(stopwords.words('norwegian')),
        'portuguese': set(stopwords.words('portuguese')),
        'romanian': set(stopwords.words('romanian')),
        'russian': set(stopwords.words('russian')),
        'slovene': set(stopwords.words('slovene')),
        'spanish': set(stopwords.words('spanish')),
        'swedish': set(stopwords.words('swedish')),
        'tajik': set(stopwords.words('tajik')),
        'turkish': set(stopwords.words('turkish'))
    }

    # Classify words by language and count stopwords for each language
    for word in words:
        for language, stopwords_list in language_stopwords.items():

            if word in stopwords_list:
                language_counts[language] += 1

    # Return the language counts
    return language_counts

# Test text
test = "An article is qualunque member van un class of dedicated words naquele estão used with noun phrases per mark the identifiability of the referents of the noun phrases"

result = classify_language(test)
print(result)


{'arabic': 0, 'azerbaijani': 3, 'basque': 0, 'bengali': 0, 'catalan': 3, 'chinese': 0, 'danish': 0, 'dutch': 5, 'english': 9, 'finnish': 0, 'french': 1, 'german': 1, 'greek': 0, 'hebrew': 0, 'hinglish': 12, 'hungarian': 1, 'indonesian': 1, 'italian': 2, 'kazakh': 0, 'nepali': 0, 'norwegian': 0, 'portuguese': 1, 'romanian': 1, 'russian': 0, 'slovene': 0, 'spanish': 1, 'swedish': 0, 'tajik': 0, 'turkish': 0}


In [4]:
import random
from nltk import ngrams

# Load the Poetry list from iqbal.txt and ghalib.txt
poetry_list = []

with open('iqbal.txt', 'r', encoding='utf-8') as file:
    poetry_list += file.read().splitlines()

with open('ghalib.txt', 'r', encoding='utf-8') as file:
    poetry_list += file.read().splitlines()

# Tokenize the poetry_list into words

words = []
for line in poetry_list:
    word_list = line.split()
    for word in word_list:
        words.append(word)

# Generate Bigram Model
bigram_model = {}

for word1, word2 in ngrams(words, 2):
    if word1 not in bigram_model:
        bigram_model[word1] = []
    bigram_model[word1].append(word2)

# Function to generate a ghazal verse using a model
def generate_ghazal_verse(model, length_range):
    verse = []
    length = random.randint(length_range[0], length_range[1])
    
    while length > 0:
        if not verse:
            # Start with a random word from the corpus
            current_word = random.choice(words)
        else:
            # Choose the next word based on the bigram model
            next_words = model.get(verse[-1], [])
            if not next_words:
                break
            current_word = random.choice(next_words)
        
        verse.append(current_word)
        length -= 1
    
    return verse

# Function to check if the last words of the verses rhyme
def last_words_rhyme(verses):
    last_word = verses[0][-1]
    for verse in verses[1:]:
        if last_word[-2:] != verse[-1][-2:]:
            return False
    return True

# Generate a complete ghazal with multiple stanzas with rhyming last words
def generate_complete_ghazal(stanza_count, verses_per_stanza):
    for _ in range(stanza_count):
        while True:
            rhyming_verses = []
            for _ in range(verses_per_stanza):
                verse = generate_ghazal_verse(bigram_model, (7, 10))
                rhyming_verses.append(verse)
            
            if last_words_rhyme(rhyming_verses):
                break
        
        for verse in rhyming_verses:
            print(" ".join(verse))
        
        print()  

generate_complete_ghazal(3, 4)


گیا اب کوئی اداس بیٹھا ستم ایجاد! نہیں
رباب میں آرایش لباسِ نظم میں تمھیں پندارِ خدائی میں
باد ء رسوا ہوئے جاتا ہوں آتش ناک میں
کرے ہے چراغِ محفل اٹھ گئی ہے ہاں بھلا نہیں

کہ میں زہر ہلاہل کو کیا ہے
تھا کہ ’’ جاؤں کدھر کو صوفی میں ہے
ہے آسایشِ اربابِ غفلت میں ہیں چراغ محفل ہے
ادراک اک لفظ کہ یک گوشہ بساط ہے

کہتے ہیں ہزاروں بنوں میں لگی صدا میں
جز مرگ کا دیکھتا ہوں میں نقشِ قدم میں ہیں
کہو کیا غالب! کہ ملے شیخ حرم میں
کا کیا فائدہ یاروں کو دو ملت میں



In [None]:
import random
from nltk import ngrams
from collections import defaultdict

# Load the Poetry list from iqbal.txt and ghalib.txt
poetry_list = []

with open('iqbal.txt', 'r', encoding='utf-8') as file:
    poetry_list += file.read().splitlines()

with open('ghalib.txt', 'r', encoding='utf-8') as file:
    poetry_list += file.read().splitlines()

# Tokenize the poetry_list into words

words = []
for line in poetry_list:
    word_list = line.split()
    for word in word_list:
        words.append(word)

# Generate Bigram Model
bigram_model = defaultdict(list)
for word1, word2 in ngrams(words, 2):
    bigram_model[word1].append(word2)

# Function to generate a ghazal verse using a model
def generate_ghazal_verse(model, length_range):
    verse = []
    length = random.randint(length_range[0], length_range[1])
    
    while length > 0:
        if not verse:
            # Start with a random word from the corpus
            current_word = random.choice(words)
        else:
            # Choose the next word based on the bigram model
            current_word = random.choice(model[verse[-1]])
        
        verse.append(current_word)
        length -= 1
    
    return verse

# Function to check if the last words of the verses rhyme
def last_words_rhyme(verses):
    last_word = verses[0][-1]
    for verse in verses[1:]:
        if last_word[-2:] != verse[-1][-2:]:
            return False
    return True

# Generate a complete ghazal with multiple stanzas with rhyming last words
def generate_complete_ghazal(stanza_count, verses_per_stanza):
    for _ in range(stanza_count):
        while True:
            rhyming_verses = []
            for _ in range(verses_per_stanza):
                verse = generate_ghazal_verse(bigram_model, (7, 10))
                rhyming_verses.append(verse)
            
            if last_words_rhyme(rhyming_verses):
                break
        
        for verse in rhyming_verses:
            print(" ".join(verse))
        
        print()  


generate_complete_ghazal(3, 4)
