In [1]:
# Importing the libraries
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import spacy
import pulp

In [2]:
# Function to extract keywords from a single document
def extract_keywords(doc, top_n=3):
    tfidf = TfidfVectorizer(stop_words='english', max_features=top_n)
    response = tfidf.fit_transform([doc])
    feature_names = np.array(tfidf.get_feature_names_out())
    tfidf_scores = response.toarray()[0]
    top_indices = tfidf_scores.argsort()[-top_n:][::-1]
    return feature_names[top_indices]

In [3]:
def generate_slide_deck_title(bullet_points, top_n=3):
    # Combine all bullet points into a single document
    combined_text = ' '.join(bullet_points)

    nlp = spacy.load('en_core_web_lg')
    
    doc = nlp(combined_text)
    
    # Extract key phrases using spaCy noun chunks
    key_phrases = [chunk.text for chunk in doc.noun_chunks]
    
    # Extract keywords using TF-IDF
    keywords = extract_keywords(combined_text, top_n)
    
    # Combine key phrases and keywords, removing duplicates
    combined_phrases = list(dict.fromkeys(key_phrases + list(keywords)))
    
    # Calculate TF-IDF scores for combined phrases
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(combined_phrases)
    feature_names = np.array(tfidf.get_feature_names_out())
    tfidf_scores = tfidf_matrix.toarray().sum(axis=0)
    
    # Create a dictionary of phrase scores
    phrase_scores = {phrase: tfidf_scores[i] for i, phrase in enumerate(combined_phrases)}
    
    # Create ILP model
    model = pulp.LpProblem("Title_Generation", pulp.LpMaximize)
    
    # Create binary variables for each phrase
    phrase_vars = {phrase: pulp.LpVariable(f"phrase_{i}", cat='Binary') for i, phrase in enumerate(combined_phrases)}
    
    # Objective function: maximize the sum of the TF-IDF scores of the selected phrases
    model += pulp.lpSum([phrase_vars[phrase] * phrase_scores[phrase] for phrase in combined_phrases])
    
    # Constraint: select exactly top_n phrases
    model += pulp.lpSum([phrase_vars[phrase] for phrase in combined_phrases]) == top_n
    
    # Solve the ILP model
    model.solve()
    
    # Get the selected phrases
    selected_phrases = [phrase for phrase in combined_phrases if pulp.value(phrase_vars[phrase]) == 1]
    
    # Capitalize each word in each phrase
    capitalized_phrases = [' '.join(word.capitalize() for word in phrase.split()) for phrase in selected_phrases]
    
    # Generate a title by joining the top N key phrases
    title = ' '.join(capitalized_phrases)
    
    return title

In [8]:
# Example bullet points
bullet_points = [
    "Math is all around us.", 
    "Math helps us understand the world and solve problems.", 
    "This text will explain what math is, why it is important, and how we use it in our daily lives.", 
    "There are many different areas of math, such as addition, subtraction, multiplication, division, geometry, and algebra.", 
    "Another important part of math is geometry.", 
    "Algebra is another area of math that is very important.", 
    "Math is important for many reasons.", 
    "When we use math, we learn to think logically and critically.", 
    "Second, math is important for many jobs.", 
    "Even jobs that do not seem related to math often require basic math skills.", 
    "For example, chefs use math to measure ingredients, and cashiers use math to give the correct change.", 
    "Third, math helps us understand the world.", 
    "We also use math in our daily lives in many ways.", 
    "For example, when we cook, we use math to measure ingredients and follow recipes.", 
    "In conclusion, math is a very important subject that helps us in many ways.", 
    "We use math every day, often without even thinking about it.", 
    "Math is a valuable tool that helps us in many areas of our lives."

]

# Generate a slide deck title
title = generate_slide_deck_title(bullet_points, top_n=3)
print(title)


IndexError: index 33 is out of bounds for axis 0 with size 33