In [None]:
#|default_exp llm

In [None]:
#|export
from traitlets import HasTraits, Unicode, List
from langchain_openai import ChatOpenAI
import os
from langchain.docstore.document import Document
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

In [None]:
#|export
class LLM(HasTraits):

    def __init__(self, filepath='OPENAI_API_KEY'):
        super().__init__()

        with open(filepath, 'r') as file:
            openai_api_key = file.read().strip()
        os.environ['OPENAI_API_KEY'] = openai_api_key
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo")

In [None]:
llm_model = LLM('OPENAI_API_KEY')
llm_model

<__main__.LLM>

In [None]:
#|export
class FileModel(LLM):
    # Define a Unicode string trait
    select = Unicode()
    files = List()

    def __init__(self, course_file_dir = 'course_files/'):
        super().__init__()
        self.course_file_dir = course_file_dir
        self.embeddings = OpenAIEmbeddings()
        self.db = None

    def save_content_from_upload(values):
        for value in values:
            with open(filepath + value['name'], "wb") as fp:
                fp.write(value['content'])

    def load_text_to_db(self, text):
        doc = Document(page_content=text)
        db = FAISS.from_documents(doc, self.embeddings)
        if self.db:
            self.db.merge_from(db)
        else: 
            self.db = db

    def load_pdf_to_db(self, filepath):
        loader = PyPDFLoader(filepath) 
        pages = loader.load_and_split()
        db = FAISS.from_documents(pages, self.embeddings)
        if self.db:
            self.db.merge_from(db)
        else: 
            self.db = db
        self.files.append(filepath)

    def load_markdown_to_db(filepath):
        loader = UnstructuredMarkdownLoader(filepath, mode="elements") #mode=elements breaks up the text into chunks
        doc = loader.load()
        db = FAISS.from_documents(doc, embeddings)
        if self.db:
            self.db.merge_from(db)
        else: 
            self.db = db
        self.files.append(filepath)

    def save_content_from_upload(values):
        for value in values:
            with open(value['name'], "wb") as fp:
                fp.write(value['content'])

In [None]:
file_model = FileModel()
file_model.load_pdf_to_db("course_files/STP 420 spring 2024 course syllabus.pdf") #file as input

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()

Note nbdev2 no longer supports nbdev1 syntax. Run `nbdev_migrate` to upgrade.
See https://nbdev.fast.ai/getting_started.html for more information.
  warn(f"Notebook '{nbname}' uses `#|export` without `#|default_exp` cell.\n"
