# MakerSpace Jam

In [None]:
import nest_asyncio

nest_asyncio.apply()

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## Primary Dependencies and Context Setting

In [None]:
!pip3 install -U -q openai==0.27.8 llama-index==0.8.6 nltk==3.8.1 python-dotenv

### Load the OPENAI API key

In [None]:
import os
import openai
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.environ["OPENAI_API_KEY"]

## Data Loading

### Webpage

Using the Trafilatura Web Reader.

In [None]:
!pip3 install trafilatura

In [None]:
# list of webpages to index
webpages = [ "https://www.basketball-reference.com/" ]

In [None]:
from llama_index import TrafilaturaWebReader

web_docs = TrafilaturaWebReader().load_data([webpages[0]])
web_docs[0].text

## Load Online Workbooks

In [None]:
!pip3 install -q -U pandas

In [None]:
def get_df_from_workbook(sheet_name,
                         workbook_id = '1MB1ZsQul4AB262AsaY4fHtGW4HWp2-56zB-E5xTbs2A'):
    url = f'https://docs.google.com/spreadsheets/d/{workbook_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}'
    return pd.read_csv(url)

In [None]:
sheet_names = ['Project_Metadata', 'Teams', 'Players_2023-24', 'Schedule_2023-24', 'Player_Stats_2022-23_(Playoffs)', 'Player_Stats_2022-23_(Regular_Season)']
dict_of_dfs = {sheet: get_df_from_workbook(sheet) for sheet in sheet_names}
dict_of_dfs['Project_Metadata'].head()

## Indexing

### Context setting

In [None]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding()
chunk_size = 1000
llm = OpenAI(
    temperature=0, 
    model="gpt-3.5-turbo",
    streaming=True
)

service_context = ServiceContext.from_defaults(
    llm=llm, 
    chunk_size=chunk_size,
    embed_model=embed_model
)

### Vector Store

This is intended to be a global vector store to insert the nodes from all data sources

In [None]:
!pip3 install -U -q chromadb==0.4.6 tiktoken==0.4.0 sentence-transformers==2.2.2 pydantic==1.10.11

In [None]:
from llama_index import VectorStoreIndex
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
import chromadb

In [None]:
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("all_data")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_index = VectorStoreIndex([], storage_context=storage_context, service_context=service_context)

### Construct Nodes & Populate Index

Parse nodes for each loaded data source and insert it to the vector store.

In [78]:
# # parse nodes from webpages and insert into vector index
# for w_doc in web_docs:
#     nodes = node_parser.get_nodes_from_documents([w_doc])
#     for node in nodes:
#         node.metadata = {'title': 'Basketball Stats and History',
#                          'type': 'webpage',
#                          'url': 'https://www.basketball-reference.com/',
#                          'description': 'Basketball Stats and History',
#                          'accessibility': 'public'
#                         }
#     vector_index.insert_nodes(nodes) 
    
# parse nodes from workbooks and insert into vector index
for w_doc in web_docs:
    nodes = node_parser.get_nodes_from_documents([w_doc])
    for node in nodes:
        node.metadata = {'title': 'Basketball Stats and History',
                         'type': 'webpage',
                         'url': 'https://www.basketball-reference.com/',
                         'description': 'Basketball Stats and History',
                         'accessibility': 'public'
                        }

    vector_index.insert_nodes(nodes) 

All that's left to do is attach the tool to an OpenAIAgent and let it rip!

Source Code Here:
- [`OpenAIAgent`](https://github.com/jerryjliu/llama_index/blob/d24767b0812ac56104497d8f59095eccbe9f2b08/llama_index/agent/openai_agent.py#L361)

In [None]:
from llama_index.agent import OpenAIAgent

agent = OpenAIAgent.from_tools(
    [auto_retrieve_tool], llm=llm, verbose=True
)

In [None]:
response = agent.chat("Who is the current 'Trending Player' in the NBA?")
print(str(response))