In [None]:
!pip install smartsheet-python-sdk langchain pandas pinecone-client gradio

In [13]:
import smartsheet
import re
import pandas
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA
import pinecone
from datetime import date
import gradio as gr

#### Pinecone Config

In [6]:
PINECONE_API_KEY = "488f39e6-74bd-40e7-ad4e-2a48c66590f0"
PINECONE_ENV = "us-east4-gcp"

## Build Knowledge Base

In [7]:
def contains_phase_number(text):
    # pattern = r"phase\s*\d"
    pattern = r"phase\s"

    return bool(re.search(pattern, text.lower()))

In [8]:
def preprocess_all_conent(access_token, sheet_id):
    smart_client = smartsheet.Smartsheet(access_token=access_token)
    current_sheet = smart_client.Sheets.get_sheet(sheet_id)
    columns = smart_client.Sheets.get_columns(sheet_id).data

    headers = [c.title for c in columns]

    column_ids = [[c for c in columns if c.title == h][0].id for h in headers]

    raw_text = ""
    temp_text = ""
    phase_id = 1

    phases = []
    individual_phases = []

    pre_available = False

    for row in current_sheet.rows:
        values = [
            [cell.value for cell in row.cells if cell.column_id == col][0]
            for col in column_ids
        ]
        if values[headers.index('Task Name')] is not None and contains_phase_number(values[headers.index('Task Name')]):
            raw_text += '\n\n' + '-' * 30 + f' {phase_id}. ' + values[headers.index('Task Name')] + ' ' + '-'*30 +'\n'
            phases.append(f' {phase_id}. ' + values[headers.index('Task Name')])
            phase_id += 1

            individual_phases.append(temp_text)
            temp_text = '\n\n' + '-' * 30 + ' ' + values[headers.index('Task Name')] + ' ' + '-'*30 +'\n'
        for i in range(len(values)):
            if values[i] != None:
                raw_text += headers[i] + ": " + str(values[i]) + "\n"
                temp_text += headers[i] + ": " + str(values[i]) + "\n"
        raw_text += "\n"
        temp_text += '\n'

    individual_phases.append(temp_text)

    with open("processed_files/phases.txt", "w") as phase_file:
        phase_file.write('\n'.join(phases))

    with open("processed_files/all_content.txt", "w") as raw_file:
        raw_file.write(raw_text)

    if individual_phases[0] is not None and len(individual_phases[0]) > 1000:
        pre_available = True
        with open("processed_files/pre.txt", "w") as pre_file:
            pre_file.write(individual_phases[0])

    for i in range(1, len(individual_phases)):
        with open(f"processed_files/phase{i}.txt","w") as file:
            file.write(individual_phases[i])

    print('Preprocess Done!!!')

    return pre_available

In [14]:
def delete_index():
    for index in pinecone.list_indexes():
        pinecone.delete_index(index)

    print('Deleted All Indexes')

In [15]:
def build_pinecone_index(filename, index_name, chunck_size, openai_key):
    loader = TextLoader(f"processed_files/{filename}.txt")
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=chunck_size, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(openai_api_key=openai_key)

    if index_name not in pinecone.list_indexes():
        pinecone.create_index(name=index_name, metric="dotproduct", dimension=1536)
        vectorstore = Pinecone.from_documents(
            docs, embedding=embeddings, index_name=index_name
        )

        print('Created Index:', index_name)

In [16]:
def build_knowledge_base(access_token, sheet_id, openai_key, model_name):
    pre_available = preprocess_all_conent(access_token, sheet_id)

    pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)

    delete_index()

    build_pinecone_index("all_content", 'all', 450, openai_key)

    if pre_available:
        build_pinecone_index('pre', 'baseline', 450, openai_key)

    phases = [s.strip() for s in open('processed_files/phases.txt', 'r').readlines()]

    for i in range(len(phases)):
        build_pinecone_index(f'phase{i+1}', f'phase{i+1}', 450, openai_key)

    if pre_available:
        phases = ['all', 'baseline'] + phases
    else:
        phases = ['all'] + phases

    with open("processed_files/phases.txt", "w") as phase_file:
        phase_file.write('\n'.join(phases))

    return phases

## Smartsheet Analysis

In [17]:
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)

In [18]:
def generate_analysis(openai_key, model_name, phase):
    embeddings = OpenAIEmbeddings(openai_api_key=openai_key)

    pinecone_index = pinecone.Index(index_name="all")
    vectorstore = Pinecone(
        index=pinecone_index, embedding=embeddings, text_key="text"
    )

    model = ChatOpenAI(
        openai_api_key=openai_key, temperature=0, model_name=model_name
    )
    chain = RetrievalQA.from_chain_type(
        llm=model, chain_type="stuff", retriever=vectorstore.as_retriever()
    )

    today = date.today()

    prompt = f"""
    % Who you are:
    - You are a professional excellent Project Manager.

    % What you should know:
    - What you know is all about the one project.
    - Today is {today}.
    - Mustn't consider weekends when calculating date. So You MUST KNOW that a week has 5 workdays when calculating date.

    % What you do:
    - Write your detailed analysis about {phase}.
    - Write current progress of the phase: what to do in the phase, what have been done and what should do in the phase
    - Write what the observation of the project is.
    - Discover potential risk
    - Extract insight
    - Recommendation for future
    """

    print(phase)

    response = chain.run(prompt)

    return response

In [19]:
def generate_answer(openai_key, model_name, phase, query):
    phase_id = phase.split('.')[0]

    index_name = 'phase' + phase_id if phase_id.isdigit() else phase

    embeddings = OpenAIEmbeddings(openai_api_key=openai_key)

    pinecone_index = pinecone.Index(index_name=index_name)
    vectorstore = Pinecone(
        index=pinecone_index, embedding=embeddings, text_key="text"
    )
    model = ChatOpenAI(
        openai_api_key=openai_key, temperature=0, model_name=model_name
    )
    chain = RetrievalQA.from_chain_type(
        llm=model, chain_type="stuff", retriever=vectorstore.as_retriever()
    )

    today = date.today()

    prompt = f"""
    You are a professional project manager.
    Answer the following question as best and detailed as possible.

    Question: {query}
    """

    response = chain.run(prompt)

    print("Query: ", query)
    print('Answer: ', response)
    return response

## User Interface

In [21]:
with gr.Blocks() as pre_ui:
    with gr.Column():
        gr.Markdown(
            """
            # Build Knowledge Base
            """
        )
        with gr.Row():
            smart_access_token = gr.Textbox(label="SmartSheet Access Token")
            sheet_id = gr.Textbox(label="Sheet ID")
        with gr.Row():
            _opeanai_key = gr.Textbox(show_label=False, placeholder="OpenAI API Key")
            _model_name = gr.Dropdown(
                [
                    "gpt-4-0314",
                    "gpt-4-0613",
                    "gpt-4",
                    "gpt-3.5-turbo-0613",
                    "gpt-3.5-turbo-0301",
                    "gpt-3.5-turbo-16k-0613",
                ],
                show_label=False,
            )
        with gr.Row():
            result = gr.Textbox(label="Result")
            b_build = gr.Button("Build")

    def build_knowledge_base(access_token, sheet_id, openai_key, model_name):
        phases = build_knowledge_base(access_token, sheet_id, openai_key, model_name)
        _phases = phases[2: ] if phases[1] == 'baseline' else phases[1:]
        return 'Success!', gr.Dropdown(_phases), gr.Dropdown(phases)


with gr.Blocks() as run_ui:
    gr.Markdown(
        """
        # Project Schedule Analysis
        """
    )

    with gr.Row():
        opeanai_key = gr.Textbox(show_label=False, placeholder="OpenAI API Key")
        model_name = gr.Dropdown(
            [
                "gpt-4-0314",
                "gpt-4-0613",
                "gpt-4",
                "gpt-3.5-turbo-0613",
                "gpt-3.5-turbo-0301",
                "gpt-3.5-turbo-16k-0613",
            ],
            show_label=False,
        )


    with gr.Row():
        with gr.Column():
            analysis_text = gr.TextArea(label="Detailed Analysis", lines=15, max_lines=15)
            with gr.Row():
                choose_phase = gr.Dropdown([])
                b_phase = gr.Button("Get Analysis")
        with gr.Column():
            choice = gr.Dropdown(
                [],
                label="Pinecone Index",
            )
            query = gr.Textbox(label="Query:")
            banswer = gr.Button("Generate Answer")
            answer = gr.TextArea(label="Answer:", lines=6, max_lines=10)


    def set_analysis(opeanai_key, model_name, phase_text):
        response = generate_analysis(
            opeanai_key, model_name, phase_text
        )
        return response

    def set_answer(opeanai_key, model_name, index_name, query):
        if index_name == "" or query == "":
            return "Choose Index and Input QUERY!!!"
        response = generate_answer(
            opeanai_key, model_name, index_name, query
        )
        return response

    b_build.click(build_knowledge_base, inputs=[smart_access_token, sheet_id, _opeanai_key, _model_name], outputs=[result, choose_phase, choice])

    b_phase.click(set_analysis, [opeanai_key, model_name, choose_phase], outputs=analysis_text)
    banswer.click(set_answer, [opeanai_key, model_name, choice, query], outputs=answer)


demo = gr.TabbedInterface([pre_ui, run_ui], ["Build Knowledge-Base", "Run System"])
demo.queue().launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://67cd927db5c843c147.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


