Skip to content

Commit

Permalink
Merge pull request #4 from NavinKumarMNK/mnk-models-support
Browse files Browse the repository at this point in the history
[Model Support] Now supported 4 models [Nous-Capybara-34B, Qwen-32B, Mistral-7B, C4AI-35B].
  • Loading branch information
NavinKumarMNK committed Apr 11, 2024
2 parents a13330e + c65b4e4 commit aff8dff
Show file tree
Hide file tree
Showing 27 changed files with 417 additions and 444 deletions.
7 changes: 6 additions & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ version: 2
mkdocs:
configuration: mkdocs.yml

build:
os: ubuntu-22.04
tools:
python: "3.10"

# Optionally declare the Python requirements required to build your docs
python:
install:
- requirements: docs/requirements.txt
- requirements: docs/requirements.txt
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# AI-Learning Platform

## Documentation
## Documentation [![Documentation Status](https://readthedocs.org/projects/ai-learning-platform/badge/?version=latest)](https://ai-learning-platform.readthedocs.io/en/latest/?badge=latest)
- This project uses `mkdocs` as the documentation service
- serve the document

```bash
pip install mkdocs
mkdocs serve
```
```
2 changes: 2 additions & 0 deletions ml_service/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.html binary
*.ipynb binary
4 changes: 3 additions & 1 deletion ml_service/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ __pycache__
.env
cudnn.tar.xz
*.tar.gz
core.*
core.*
.env
.history
2 changes: 2 additions & 0 deletions ml_service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ RUN conda install -y rocketce::onnxruntime
# COPY . /app
ENV RAY_DEDUP_LOGS=1

RUN printenv | awk -F= '{print "export " $1 "=\"" $2 "\""}' >> ~/.bashrc

WORKDIR /app/
40 changes: 20 additions & 20 deletions ml_service/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,53 +29,53 @@ run-vllm-ray:
docker run -it --runtime=nvidia --gpus all -v /data/navin-kumar-m:/data --ipc=host --privileged ray-vllm

ray-up:
ray up -y config/ray-cluster.yaml --no-config-cache -v
ray up -y config/ray/ray-cluster.yaml --no-config-cache -v

ray-down:
ray down -y config/ray-cluster.yaml
ray down -y config/ray/ray-cluster.yaml

ray-attach:
ray attach config/ray-cluster.yaml
ray attach config/ray/ray-cluster.yaml

RAY_BASH_CODE ?= 'python -c "import ray; ray.init()"' # Default Python code
ray-exec:
ray exec config/ray-cluster.yaml $(RAY_BASH_CODE)
ray exec config/ray/ray-cluster.yaml $(RAY_BASH_CODE)

ray-serve-deploy:
serve deploy config/ray-serve.yaml -v
serve deploy config/ray/ray-serve.yaml -v

ray-serve-run:
serve run config/ray-serve.yaml
serve run config/ray/ray-serve.yaml

ray-serve-status:
serve status

ray-log:
ray logs config/ray-cluster.yaml
ray logs config/ray/ray-cluster.yaml

ray-rsync-up:
ray rsync-up config/ray-cluster.yaml -v
ray rsync-up config/ray/ray-cluster.yaml -v

ray-rsync-down:
ray rsync-downs config/ray-cluster.yaml
ray rsync-downs config/ray/ray-cluster.yaml

ray-up-dev:
ray up -y config/ray-cluster.dev.yaml --no-config-cache -v
ray up -y config/ray/ray-cluster.dev.yaml --no-config-cache -v

ray-down-dev:
ray down -y config/ray-cluster.dev.yaml
ray down -y config/ray/ray-cluster.dev.yaml

ray-attach-dev:
ray attach config/ray-cluster.dev.yaml
ray attach config/ray/ray-cluster.dev.yaml


ray-dev-ssh-keygen:
if [ -z "$(IP)" ]; then echo "IP is not set"; exit 1; fi
if [ -z "$(PORT)" ]; then echo "PORT is not set"; exit 1; fi
ssh-keygen -f "/root/.ssh/known_hosts" -R "[$(IP)]:$(PORT)"
ray-dev-ssh-rmkey:
ssh-keygen -f "$(KH_PATH)" -R "[$(IP)]:$(PORT)"

ray-dev-ssh:
if [ -z "$(IP)" ]; then echo "IP is not set"; exit 1; fi
if [ -z "$(PORT)" ]; then echo "PORT is not set"; exit 1; fi

ssh -p $(PORT) root@$(IP)

rm-pycache:
find . -type d -name __pycache__ -exec rm -r {} +

llm_chat:
python3 ./test/llm_client_http.py
43 changes: 16 additions & 27 deletions ml_service/README.md
Original file line number Diff line number Diff line change
@@ -1,40 +1,29 @@
# AI Learning Management System

- Code is written to be run on a local linux PC machine on the lab.

## Plan
- [ ] Full website of Video Tutorials
- [ ] Deployed Website
- [ ] Deployed LLM Model
- [ ] LLM Model SFT, DPO
- [ ] Dataset
- [ ] PDF parsing
- [ ] Speech-to-text (transcript)
- [ ] LLM Chat integeration with webiste

## Stack
### ML
- Python
- Pytorch
- Ray

### Backend
- Go
- PostgreSQL

### Frontend
- React.js

## Setup
> **Note:** Look at the `Makefile` for available commands.
Fill the configuration in .env
```env
ROOT_PATH=
```

For development
- before building the image, get the cudnn.tar.xz and place in the main directory. this is done to avoid authentication while downloading the library
- rename the `*.tar.xz` -> `cudnn.tar.xz`. so the dockerfile could pick it up while building the image.

```bash
docker build . -t <image_name>
```

Run the container
```bash
docker run -it --runtime=nvidia --gpus all --ipc=host --privileged llm_serve
```
docker run -it --runtime=nvidia --gpus all --ipc=host --privileged ml_service
```

Setup Using Ray:
```bash
make ray-up # starts the cluster (add -dev) if need to perfom dev env
make ray-attach # attach to the container shell
make ray-serve-run # to start the ray deployments
```
15 changes: 9 additions & 6 deletions ml_service/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,22 @@ app:
name: vit-ray

llm:
model_name: C4AI-35B # supported = [Nous-Capybara-34B, Qwen-32B, Mistral-7B, C4AI-35B]
time_consecutive_res: 0.5
serve_config:
model: /data/c4ai-35b # name / path
download_dir: null # path to model
load_format: safetensors # format of model {auto, pt, dummy, safetensors}
model: /data/c4ai-35b # supported - [/data/nous-34b, /data/qwen-32b, /data/mistral-7b, /data/c4ai-35b]
download_dir: null # download model dir
load_format: auto #safetensors # format of model {auto, pt, dummy, safetensors}
dtype: float16 # data type {auto, float32, float16, bfloat16}
max_model_len: 8192 # max length of model
max_model_len: 16384 # max length of model
worker_use_ray: false # use ray for worker
engine_use_ray: false # use ray for engine
pipeline_parallel_size: 1 # size of pipeline parallel
# pipeline_parallel_size: 1 # size of pipeline parallel
tensor_parallel_size: 4 # size of tensor parallel
# gpu_memory_utilization: 0.95 # gpu memory utilization
# gpu_memory_utilization: 0.9 # gpu memory utilization
enforce_eager: true
disable_custom_all_reduce: True
# trust_remote_code: true # for cohere models comment it

emb:
serve_config:
Expand Down
40 changes: 40 additions & 0 deletions ml_service/config/model/c4ai-35b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
prompt_format:
system: "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble
The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
# System Preamble
## Basic Rules
You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
# User Preamble
## Task and Context
You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.
## Style Guide
Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
## Available Tools
Here is a list of tools that you have available to you:
```python
def internet_search(query: str) -> List[Dict]:
'''Returns a list of relevant document snippets for a textual query retrieved from the internet
Args:
query (str): Query to search the internet with
'''
pass
```
```python
def directly_answer() -> List[Dict]:
'''Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history
'''
pass
```<|END_OF_TURN_TOKEN|>"
assistant: "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{instruction}"
trailing_assistant: ""
user: "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{instruction}<|END_OF_TURN_TOKEN|>"
system_in_user: false
accept_sys_from_req: false
recursive_sys_prompt: true
10 changes: 10 additions & 0 deletions ml_service/config/model/mistral-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
prompt_format:
system: "Your name is MegAcad and you are an AI Assitant helps in tutoring & guiding undergraduate students \
Think carefully through the topic, step by step in a systematic manner, and allow each step to logically build on the previous one. \
Dont answer to any questions other than studies. Everyone one of your \
response should be in english. use other languages only if its necessary.\n"
assistant: "{instruction}"
trailing_assistant: ""
user: " [INST] {system} {instruction} [/INST]"
system_in_user: true
accept_sys_from_req: false
10 changes: 10 additions & 0 deletions ml_service/config/model/nous-capybara-34b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
prompt_format:
system: "Your name is MegAcad and you are an AI Assitant helps in tutoring & guiding undergraduate students \
Think carefully through the topic, step by step in a systematic manner, and allow each step to logically build on the previous one. \
Dont answer to any questions other than studies. Everyone one of your \
response should be in english. use other languages only if its necessary.\n"
assistant: "{instruction}"
trailing_assistant: ""
user: "USER: {system} {instruction} ASSISTANT:"
system_in_user: true
accept_sys_from_req: false
12 changes: 12 additions & 0 deletions ml_service/config/model/qwen-32b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
prompt_format:
system: "<|im_start|>system
Your name is MegAcad and you are an AI Assitant helps in tutoring & guiding undergraduate students \
Think carefully through the topic, step by step in a systematic manner, and allow each step to logically build on the previous one. \
Dont answer to any questions other than studies. Everyone one of your \
response should be in english. Don't use other languages unless it necessary.<|im_end|>"
assistant: "<|im_start|>assistant{instruction}"
trailing_assistant: ""
user: "<|im_start|>user
{instruction}<|im_end|>"
system_in_user: false
accept_sys_from_req: false
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,7 @@ applications:
# num_cpus: 32
# num_gpus: 4

# - name: stt
# route_prefix: /stt
# import_path: stt_serve:app
#
# deployments:
# - name: STTDeployment
# num_replicas: 1
# max_concurrent_queries: 8
# ray_actor_options:
# num_gpus: 1

- name: embedding
- name: emb
route_prefix: /api/v1/embedder
import_path: emb_serve:main
args:
Expand Down
4 changes: 2 additions & 2 deletions ml_service/emb_serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ async def generate_embedding(self, request: Request) -> JSONResponse:
def main(args: Dict[str, str]) -> Application:
# load env
load_env()
EMB_PATH = os.getcwd()
CONFIG_FILE = os.path.join(EMB_PATH, "config.yaml")
ROOT_PATH = os.environ.get("ROOT_PATH", None)
CONFIG_FILE = os.path.join(ROOT_PATH, "config.yaml")
if os.path.exists(CONFIG_FILE) is None:
raise ConfigFileMissingError(
"MAIN_CONFIG_FILE_PATH environmental variable is missing."
Expand Down
Loading

0 comments on commit aff8dff

Please sign in to comment.