# KateiKyoushi


# Imports


In [None]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

import nltk
import json
from collections import Counter
from pathlib import Path
from PyPDF2 import PdfReader

In [None]:
from openai import OpenAI

client = OpenAI()

In [None]:
# If first time, uncomment below lines

# nltk.download("punkt")
# nltk.download("wordnet")
# nltk.download("stopwords")

# Utils


In [None]:
def create_file(file_name, text):
    with open(file_name, "w+") as f:
        f.write(text)

In [None]:
class PDFParser:
    def __init__(self, path) -> None:
        self.path = path
        self.text = self.parse_text()

    def parse_text(self):
        pdf_reader = PdfReader(self.path)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
        return text

    def get_keywords(self, num):
        keywords = []
        words = word_tokenize(self.text)

        # Remove punctuation
        words = [word for word in words if word.isalnum()]

        # Remove stopwords
        stop_words = set(stopwords.words("english"))
        words = [word for word in words if not word.lower() in stop_words]

        # Lemmatization
        lemmatizer = WordNetLemmatizer()
        words = [lemmatizer.lemmatize(word) for word in words]

        # Get top-{num} most common words
        word_freq = Counter(words)
        keywords = word_freq.most_common(num)

        str_keywords = ", ".join([w[0] for w in keywords])

        return f"Top-{num} keywords: {str_keywords}"

In [None]:
test = PDFParser("paper/Attention Is All You Need.pdf")
test.get_keywords(5)
test.text

# Prompt Engineering


In [None]:
def compile_prompt(content, keywords):
    context = f"""
    You are a highly skilled instructor tasked with creating a structured course outline based on provided keywords.
    Using these keywords, design a comprehensive course that encompasses all key concepts in all materials.
    """

    context += f"""Keywords: {keywords}"""
    context += f"""Materials: {content}"""

    context += f"""
    Output Requirement: Generate a course outline formatted in Markdown, strictly adhering to the example format provided below.
    Each lesson should include a lesson name (lesson_name) and a concise description (lesson_abstract).
    Use different levels of headings, bold and italic format to highlight important topics.

    Example Format:
    # lesson_name
    lesson_abstract
    
    ## important topic 1
    - important topic 1 breakdown
    explanation of the topic
    
    ### key concept explanation
    explanation of the concept
    
    ## important topic 2
    explanation of the topic
    
    ### important topic 2 breakdown
    - important topic 2 breakdown
    explanation of the topic
    ...

    Begin designing the course now."""

    return context

In [None]:
def get_response(prompt):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return completion.choices[0].message.content

In [None]:
def generate_questions(content):
    context = f"""
    Use provided Course Outline, refine and elaborate on each topic. 
    Output your response combining the given Course Outline and your generated content.
    """

    context += f"""Materials: {content}"""

    return context

In [None]:
def generate_content(outline, time):
    context = f"""
    Using the provided course outline, create a study plan within the specified time frame. 
    Detail the key topics and suggested study sessions per hours based on the material's structure and complexity. 
    Ensure the plan is tailored to your specific learning preferences and available study hours per week. 
    """

    context += f"""Course Outline: {outline}"""
    context += f"""Time Frame: {time}"""

    return context

In [None]:
def elaborate(schedule):
    context = f"""
    Using the provided study plan, elaborate on each concept mentioned. 
    Include detailed explanations, relevant examples, and practical applications to ensure a comprehensive understanding of each topic. 
    This expanded content should assist in preparing for in-depth discussions, exams, or practical implementations related to the course material.
    """

    context += f"""Study Plan: {schedule}"""

    return context

# Test


1. 传文件 (解析成文本)
2. 给出大纲和题目数
3. 解释一个概念，给出一个问题，二（多）选一
4. 判断用户是否答对，如果答对，继续下一题；如果答错，解释错误原因
5.


In [None]:
# keywords = test.get_keywords(15)
# prompt = compile_prompt(test.text, keywords)
# response = get_response(prompt)
# create_file("1.md", response)
# schedule = get_response(generate_content(response, "3 days"))
# create_file("2.md", schedule)

In [None]:
with open("2.md", "r") as f:
    schedule = " ".join(f.readlines())

In [None]:
response = get_response(elaborate(schedule))

In [None]:
create_file("3.md", response)