In [12]:
#!pip install python-pptx

In [13]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [14]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [15]:
# creating an instance
openai = OpenAI()

In [16]:
from pptx import Presentation

class PowerPoint():
    def __init__(self,ppt):
        """
        Creates a PowerPoint object, with name and text.
        """
        self.ppt = ppt
        self.title = os.path.basename(ppt)
        self.text = self.extract_text()

    def extract_text(self):
        """
        Extracts text from powerpoint.
        """
        prs = Presentation(self.ppt)
        text_content = []
    
        for slide in prs.slides:
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    text_content.append(shape.text)
    
        return "\n".join(text_content)

In [17]:
system_prompt = "You are an assistant to a predicyive analytics instructor that analyzes the contents \
of a PowerPoint lecture presentation, and generates an MCQ type quiz \
You are to ignore text that might be navigation-related\
and respond in Markdown."

In [18]:
# A function that writes a User Prompt that asks for summaries of PowerPoints:

def user_prompt_for(powerpoint):
    user_prompt = f"You are looking at a lecture titled {powerpoint.title}"
    user_prompt += "\nThe contents of this powerpoint are as follows; \
Please provide 10 multiple-choice quiz questions out of it. \n\n"
    user_prompt += powerpoint.text
    return user_prompt

In [19]:
def messages_for(powerpoint):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(powerpoint)}
    ]

In [20]:
# And now: call the OpenAI API. 

def summarize(powerpoint_path):
    powerpoint = PowerPoint(powerpoint_path)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(powerpoint)
    )
    return response.choices[0].message.content

In [21]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [22]:
ppt_file = "BUSI 650 - clustering.pptx" 
display_summary(ppt_file)

Here's a quiz based on the contents of the lecture titled **BUSI 650 - Clustering**:

### Quiz on Cluster Analysis

1. **What is cluster analysis primarily used for in marketing?**
   - A) Developing new technologies
   - B) Segmenting markets into smaller groups with shared characteristics
   - C) Analyzing financial performance
   - D) Predicting stock prices  
   **Answer: B**

2. **Which of the following is a characteristic of cluster analysis?**
   - A) It requires a specified dependent variable.
   - B) It organizes data into two or more similar groups.
   - C) It is considered a supervised learning technique.
   - D) It cannot be used for numerical data.  
   **Answer: B**

3. **What is a common method for evaluating clusters visually?**
   - A) Box Plot
   - B) Line Chart
   - C) Elbow Chart
   - D) Histogram  
   **Answer: C**

4. **Which clustering method treats each observation as a separate cluster initially?**
   - A) K-Means Clustering
   - B) Agglomerative Clustering
   - C) Divisive Clustering
   - D) Hierarchical Clustering  
   **Answer: B**

5. **What does the silhouette score measure?**
   - A) The total number of clusters formed
   - B) The distance between each observation and the cluster centroid
   - C) The overall error in the clustering process
   - D) The mean value of all observations  
   **Answer: B**

6. **Which clustering approach combines individual observations into subgroups based on distance?**
   - A) K-Means Clustering
   - B) Hierarchical Clustering
   - C) DBSCAN Clustering
   - D) Ensemble Clustering  
   **Answer: B**

7. **What type of data is K-Means clustering applicable to?**
   - A) Categorical data only
   - B) Numerical data only
   - C) Text data
   - D) Any type of data  
   **Answer: B**

8. **In Ward's method of hierarchical clustering, clusters are combined to minimize what?**
   - A) The total number of clusters
   - B) The within-cluster sum of squares
   - C) The distance between clusters
   - D) The average distance from the centroid  
   **Answer: B**

9. **Which method measures the straight-line distance between two points in hierarchical clustering?**
   - A) Manhattan Method
   - B) Euclidean Method
   - C) Jaccard’s Method
   - D) Matching Method  
   **Answer: B**

10. **In a case study about online perfume sales, what was the main goal of using cluster analysis?**
    - A) To determine optimal pricing strategies
    - B) To segment the market for better customer engagement
    - C) To analyze supply chain efficiencies
    - D) To improve product quality  
    **Answer: B**

Feel free to use or modify these questions as needed!