In [5]:
import openai
import groq
import os
import json
from groq import Groq
from llama_index.core import SimpleDirectoryReader


In [6]:
os.environ["GROQ_API_KEY"] = "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"

# Summarize Files


In [8]:
reader = SimpleDirectoryReader(input_dir=".")
documents = reader.load_data()
doc_dicts = [{"content": d.text, **d.metadata} for d in documents]

In [9]:
doc_dicts

[{'content': 'MIT License\n\nCopyright (c) 2024 Ajay Arasanipalai\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the "Software"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWI

In [10]:
PROMPT = f"""
The following is a list of file contents, along with their metadata. For each file, provide a summary of the contents.

{doc_dicts}

Return a JSON list with the following schema:

```json
{{
  "files": [
    {{
      "filename": "name of the file",
      "summary": "summary of the content"
    }}
  ]
}}
```
""".strip()

In [11]:
client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "Always return JSON. Do not include any other text or formatting characters.",
        },
        {
            "role": "user",
            "content": PROMPT,
        },
    ],
    model="llama3-70b-8192",
    response_format={"type": "json_object"},
)

summaries = json.loads(chat_completion.choices[0].message.content)["files"]

In [12]:
summaries

[{'filename': 'LICENSE', 'summary': 'MIT license information'},
 {'filename': 'loader.py',
  'summary': 'Function definition for getting document summaries'},
 {'filename': 'requirements.txt',
  'summary': 'List of dependencies for the project'},
 {'filename': 'scratch.ipynb',
  'summary': 'Code for interacting with LLaMA and Groq'},
 {'filename': 'scratch.ipynb',
  'summary': 'Code for interacting with LLaMA and Groq'},
 {'filename': 'scratch.ipynb',
  'summary': 'Code for interacting with LLaMA and Groq'}]

# Create File Tree


In [13]:
PROMPT = f"""
The following is a list of files and a summary of their contents. Read them carefully, then propose a directory structure that optimally organizes the files using known conventions and best practices.

{summaries}

You will solve this task by adding a `path` key to the JSON object below. The value of the `path` key should be the path to the file that you think is the most relevant to the summary.
""".strip()

In [14]:
client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "Always return JSON. Do not include any other text or formatting characters.",
        },
        {
            "role": "user",
            "content": PROMPT,
        },
    ],
    model="llama3-70b-8192",
    # response_format={"type": "json_object"},
)

file_tree = json.loads(chat_completion.choices[0].message.content)

In [16]:
import pathlib

In [24]:
BASE_DIR = pathlib.Path("test_dir")
BASE_DIR.mkdir(exist_ok=True)

for file in file_tree:
    file["path"] = pathlib.Path(file["path"])
    # Create file in specified base directory
    (BASE_DIR / file["path"]).parent.mkdir(parents=True, exist_ok=True)
    with open(BASE_DIR / file["path"], "w") as f:
        f.write("")

In [9]:
json.loads('{\n        "file_path": "/Users/iyaja/Git/llama-fs/LICENSE",\n        "summary": "MIT License"\n    },\n    {\n        "file_path": "/Users/iyaja/Git/llama-fs/main.py",\n        "summary": "Python script"\n    },\n    {\n        "file_path": "/Users/iyaja/Git/llama-fs/requirements.txt",\n        "summary": "List of dependencies"\n    },\n    {\n        "file_path": "/Users/iyaja/Git/llama-fs/scratch.ipynb",\n        "summary": "Jupyter notebook"\n    }')

NameError: name 'json' is not defined

# Test Server

In [33]:
import requests

In [39]:
requests.post(
    "http://127.0.0.1:8000/batch",
    json={"path": "/Users/iyaja/Git/llama-fs",},
).json()

[{'src_path': 'electron-react-app/CHANGELOG.md',
  'dst_path': '/Users/iyaja/Git/llama-fs/electron-react-app/docs/CHANGELOG.md',
  'summary': 'The Electron React Boilerplate is a set of tools and configurations to help you build a React application with Electron. It includes features such as hot reloading, debugging, and testing. The boilerplate is designed to be flexible and customizable, allowing you to easily integrate your own dependencies and configurations.'},
 {'src_path': 'electron-react-app/CODE_OF_CONDUCT.md',
  'dst_path': '/Users/iyaja/Git/llama-fs/electron-react-app/docs/CODE_OF_CONDUCT.md',
  'summary': 'A code of conduct for contributors and maintainers of a project, outlining expected behavior and consequences for unacceptable behavior, adapted from the Contributor Covenant.'},
 {'src_path': 'electron-react-app/README.md',
  'dst_path': '/Users/iyaja/Git/llama-fs/electron-react-app/docs/README.md',
  'summary': 'README file for Electron React Boilerplate, containing ins

# Fix the mess that is Llama Index

In [29]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import Document
from llama_index.core.node_parser import TokenTextSplitter

In [30]:
splitter = TokenTextSplitter(chunk_size=6144)

In [32]:
reader = SimpleDirectoryReader(input_dir=".", recursive=True)
all_docs = []
for docs in reader.iter_data():
    # <do something with the documents per file>
    if len(docs) > 1:
        text = splitter.split_text("\n".join([d.text for d in docs]))[0]
        docs = [Document(text=text, metadata=docs[0].metadata)]
    all_docs.extend(docs)

[Document(id_='6bb86b93-10b2-4dbe-b646-ba0020a84d70', embedding=None, metadata={'file_path': '/Users/iyaja/Git/llama-fs/LICENSE', 'file_name': 'LICENSE', 'file_size': 1074, 'creation_date': '2024-05-11', 'last_modified_date': '2024-05-11'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='MIT License\n\nCopyright (c) 2024 Ajay Arasanipalai\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the "Software"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the foll