
## Prodigy Infotech Task 3
##### Text Generation with Markov chains

In [5]:
import random
import numpy as np

class MarkovChain:
    def __init__(self, n=1):
        self.n = n  # Order of the Markov chain
        self.model = {}

    def add_sentence(self, sentence):
        """Adds a sentence to the model."""
        # Split the sentence into words
        words = sentence.split()
        for i in range(len(words) - self.n):
            key = tuple(words[i:i + self.n])  # n-grams
            next_word = words[i + self.n]  # Word that follows
            if key not in self.model:
                self.model[key] = []
            self.model[key].append(next_word)

    def generate_text(self, start=None, length=10):
        """Generates text using the Markov chain."""
        if start is None:
            start = random.choice(list(self.model.keys()))  # Random starting point
        else:
            start = tuple(start.split())

        result = list(start)
        for _ in range(length):
            key = tuple(result[-self.n:])  # Get the last n-grams
            if key in self.model:
                next_word = random.choice(self.model[key])
                result.append(next_word)
            else:
                break  # Stop if there are no further words
        return ' '.join(result)

# Example usage
if __name__ == "__main__":
    # Create an instance of the MarkovChain class
    markov_chain = MarkovChain(n=2)  # Using bigrams

    # Sample sentences to train the model
    sentences = [
        "I love programming in Python",
        "Python is a great programming language",
        "I enjoy learning new things",
        "Programming is fun and rewarding",
        "Learning Python is easy"
    ]

    # Train the model with sentences
    for sentence in sentences:
        markov_chain.add_sentence(sentence)

    # Generate text
    generated_text = markov_chain.generate_text(start="Python is", length=10)
    print("Generated Text:", generated_text)


Generated Text: Python is a great programming language


In [6]:
class CharacterMarkovChain:
    def __init__(self):
        self.model = {}

    def add_text(self, text):
        """Adds text to the model."""
        for i in range(len(text) - 1):
            char = text[i]
            next_char = text[i + 1]
            if char not in self.model:
                self.model[char] = []
            self.model[char].append(next_char)

    def generate_text(self, start=None, length=10):
        """Generates text using the Markov chain."""
        if start is None:
            start = random.choice(list(self.model.keys()))  # Random starting character
        result = start
        for _ in range(length):
            if start in self.model:
                next_char = random.choice(self.model[start])
                result += next_char
                start = next_char  # Update the current character
            else:
                break  # Stop if there are no further characters
        return result

# Example usage for Character Markov Chain
if __name__ == "__main__":
    # Create an instance of the CharacterMarkovChain class
    char_markov_chain = CharacterMarkovChain()

    # Sample text to train the model
    text = "I love programming in Python. Python is great!"

    # Train the model with text
    char_markov_chain.add_text(text)

    # Generate text
    generated_text = char_markov_chain.generate_text(start="P", length=50)
    print("Generated Character Text:", generated_text)


Generated Character Text: Pythog g Pyt!


In [7]:
class NGramMarkovChain:
    def __init__(self, n=1):
        self.n = n
        self.model = {}

    def add_sentence(self, sentence):
        words = sentence.split()
        for i in range(len(words) - self.n):
            key = tuple(words[i:i + self.n])
            next_word = words[i + self.n]
            if key not in self.model:
                self.model[key] = []
            self.model[key].append(next_word)

    def generate_text(self, start=None, length=10):
        if start is None:
            start = random.choice(list(self.model.keys()))
        else:
            start = tuple(start.split())

        result = list(start)
        for _ in range(length):
            key = tuple(result[-self.n:])
            if key in self.model:
                next_word = random.choice(self.model[key])
                result.append(next_word)
            else:
                break
        return ' '.join(result)

    def get_next_word_distribution(self, key):
        """Returns a distribution of possible next words for a given key."""
        return self.model.get(tuple(key), [])

# Example usage for N-Gram Markov Chain
if __name__ == "__main__":
    ngram_markov_chain = NGramMarkovChain(n=2)

    sentences = [
        "I love programming in Python",
        "Python is a great programming language",
        "I enjoy learning new things",
        "Programming is fun and rewarding",
        "Learning Python is easy"
    ]

    for sentence in sentences:
        ngram_markov_chain.add_sentence(sentence)

    generated_text = ngram_markov_chain.generate_text(start="Python is", length=10)
    print("Generated N-Gram Text:", generated_text)

    # Example of getting the next word distribution
    next_words = ngram_markov_chain.get_next_word_distribution(["Python", "is"])
    print("Possible next words after 'Python is':", next_words)


Generated N-Gram Text: Python is a great programming language
Possible next words after 'Python is': ['a', 'easy']


In [8]:
class ConditionalMarkovChain:
    def __init__(self):
        self.model = {}

    def add_sentence(self, sentence):
        words = sentence.split()
        for i in range(len(words) - 1):
            key = words[i]
            next_word = words[i + 1]
            if key not in self.model:
                self.model[key] = []
            self.model[key].append(next_word)

    def generate_text(self, start_word, length=10):
        result = start_word
        current_word = start_word
        for _ in range(length):
            if current_word in self.model:
                next_word = random.choice(self.model[current_word])
                result += ' ' + next_word
                current_word = next_word
            else:
                break
        return result

# Example usage for Conditional Markov Chain
if __name__ == "__main__":
    conditional_markov_chain = ConditionalMarkovChain()

    sentences = [
        "I love programming in Python",
        "Python is a great programming language",
        "I enjoy learning new things",
        "Programming is fun and rewarding",
        "Learning Python is easy"
    ]

    for sentence in sentences:
        conditional_markov_chain.add_sentence(sentence)

    generated_text = conditional_markov_chain.generate_text(start_word="Python", length=10)
    print("Generated Conditional Text:", generated_text)


Generated Conditional Text: Python is fun and rewarding


In [9]:
from collections import Counter

class TextSummarizer:
    def __init__(self):
        self.sentences = []

    def add_text(self, text):
        """Adds text to the summarizer."""
        self.sentences = text.split('. ')

    def summarize(self, num_sentences=2):
        """Returns a summary consisting of the most frequent sentences."""
        word_count = Counter()
        for sentence in self.sentences:
            words = sentence.split()
            word_count.update(words)

        # Sort sentences based on the sum of the frequencies of their words
        ranked_sentences = sorted(
            self.sentences,
            key=lambda s: sum(word_count[word] for word in s.split()),
            reverse=True
        )

        # Return the top n sentences
        return '. '.join(ranked_sentences[:num_sentences]) + '.'

# Example usage for Text Summarizer
if __name__ == "__main__":
    summarizer = TextSummarizer()

    text = """
    Python is a high-level programming language. 
    It is widely used for web development, data analysis, and artificial intelligence. 
    The language is known for its simplicity and readability. 
    Python has a large community and many libraries that help developers. 
    Learning Python is a great choice for new programmers.
    """

    summarizer.add_text(text)
    summary = summarizer.summarize(num_sentences=2)
    print("Summary:", summary)


Summary: 
    It is widely used for web development, data analysis, and artificial intelligence. 
    Learning Python is a great choice for new programmers.
    .


In [10]:
class StateBasedMarkovChain:
    def __init__(self):
        self.model = {}

    def add_sentence(self, sentence, state):
        words = sentence.split()
        for i in range(len(words) - 1):
            key = (state, words[i])  # State + current word as key
            next_word = words[i + 1]
            if key not in self.model:
                self.model[key] = []
            self.model[key].append(next_word)

    def generate_text(self, start_word, state, length=10):
        result = start_word
        current_word = start_word
        for _ in range(length):
            key = (state, current_word)
            if key in self.model:
                next_word = random.choice(self.model[key])
                result += ' ' + next_word
                current_word = next_word
            else:
                break
        return result

# Example usage for State-Based Markov Chain
if __name__ == "__main__":
    state_based_markov_chain = StateBasedMarkovChain()

    sentences = [
        "I am feeling happy today",
        "The weather is sunny and bright",
        "I am sad because it is raining",
        "The day is gloomy and dark",
        "I feel great when the sun shines"
    ]

    # Adding sentences with their respective states
    for sentence in sentences:
        if "happy" in sentence or "sunny" in sentence:
            state_based_markov_chain.add_sentence(sentence, "happy")
        else:
            state_based_markov_chain.add_sentence(sentence, "sad")

    generated_text = state_based_markov_chain.generate_text(start_word="I", state="happy", length=10)
    print("Generated State-Based Text:", generated_text)


Generated State-Based Text: I am feeling happy today


In [11]:
class FlexibleMarkovChain:
    def __init__(self, n=1, use_characters=False):
        self.n = n
        self.use_characters = use_characters
        self.model = {}

    def add_text(self, text):
        """Adds text to the model (character or word based)."""
        if self.use_characters:
            for i in range(len(text) - self.n):
                key = text[i:i + self.n]
                next_char = text[i + self.n]
                if key not in self.model:
                    self.model[key] = []
                self.model[key].append(next_char)
        else:
            words = text.split()
            for i in range(len(words) - self.n):
                key = tuple(words[i:i + self.n])
                next_word = words[i + self.n]
                if key not in self.model:
                    self.model[key] = []
                self.model[key].append(next_word)

    def generate_text(self, start=None, length=10):
        """Generates text using the Markov chain."""
        if self.use_characters:
            if start is None:
                start = random.choice(list(self.model.keys()))
            result = start
            current_key = start
            for _ in range(length):
                if current_key in self.model:
                    next_char = random.choice(self.model[current_key])
                    result += next_char
                    current_key = result[-self.n:]  # Update the key
                else:
                    break
        else:
            if start is None:
                start = random.choice(list(self.model.keys()))
            else:
                start = tuple(start.split())

            result = list(start)
            for _ in range(length):
                key = tuple(result[-self.n:])
                if key in self.model:
                    next_word = random.choice(self.model[key])
                    result.append(next_word)
                else:
                    break
        return result if self.use_characters else ' '.join(result)

# Example usage for Flexible Markov Chain
if __name__ == "__main__":
    flexible_markov_chain = FlexibleMarkovChain(n=2, use_characters=False)

    sentences = [
        "I love programming in Python",
        "Python is a great programming language",
        "I enjoy learning new things",
        "Programming is fun and rewarding",
        "Learning Python is easy"
    ]

    for sentence in sentences:
        flexible_markov_chain.add_text(sentence)

    generated_text = flexible_markov_chain.generate_text(start="Python is", length=10)
    print("Generated Flexible Text:", generated_text)

    # Using character-based model
    char_markov_chain = FlexibleMarkovChain(n=2, use_characters=True)
    char_markov_chain.add_text("I love programming in Python.")
    char_generated_text = char_markov_chain.generate_text(start="Py", length=50)
    print("Generated Character Text:", char_generated_text)


Generated Flexible Text: Python is a great programming language
Generated Character Text: Python.
