Skip to content

Commit

Permalink
[AddVer]fix bug of iqSub and update version V2.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
leofang3 committed Aug 17, 2023
1 parent 720acec commit 99156da
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 139 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,14 @@ chmod +x ./bin/test_thinker

## 能力展示
* [thinker API](thinker/docs/tutorial/thinker_api.md)
* [支持量化OP列表](https://github.com/LISTENAI/linger/blob/main/doc/tutorial/support_quant_ops.md)[模型结构限制说明](thinker/docs/tutorial/restrain_of_model.md)
* [支持量化OP列表](https://github.com/LISTENAI/linger/blob/main/doc/tutorial/support_quant_ops.md)
* [模型结构限制说明](thinker/docs/tutorial/restrain_of_model.md)

## 应用示例
* 鼾声检测[https://github.com/mywang44/snoring_net]

## 版本说明
- 请参考[RELEASE](doc/tutorial/release.md)
- 请参考[RELEASE](thinker/docs/tutorial/release.md)

## 交流与反馈
- 欢迎您通过 Github Issues 来提交 BUG 与建议
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="pythinker",
version="1.1.0",
version="2.0.0",
description="A DeepLearning inference framework for venus",
author="listenai",
author_email="lingerthinker@listenai.com",
Expand Down
6 changes: 3 additions & 3 deletions thinker/docs/tutorial/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ cd thinker && sh ./script/x86_linux.sh

### pip包安装方式
``` shell
pip install pythinker==1.1.0
pip install pythinker
```

### docker镜像安装方式
Expand Down Expand Up @@ -73,12 +73,12 @@ $ sudo systemctl start docker # systemctl 命令的用法
3、拉取镜像并加载
1)、拉取镜像
```shell
docker pull listenai/thinker:1.1.0
docker pull listenai/thinker:2.0.0
```

2)、运行容器
```shell
docker container run -it listenai/thinker:1.1.0 /bin/bash
docker container run -it listenai/thinker:2.0.0 /bin/bash
```

如果一切正常,运行上面的命令以后,就会返回一个命令行提示符。
Expand Down
1 change: 1 addition & 0 deletions thinker/docs/tutorial/release.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
v2.0.0 2023.8.17 修复iqAdd和iqSub的离线内存分配问题,打包工具需和引擎代码保持一致,升级大版本;
v1.1.0 2023.8.15 打包工具增加常量折叠、指定数据存放位置功能,优化内存分析报告的生成。引擎执行器增加一些常用接口,增加通用性;完善算子和支持更多的新算子;
V1.0.0 2022.10.24 初始版本
10 changes: 5 additions & 5 deletions thinker/docs/tutorial/restrain_of_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
- PSRAM整体可用空间为8MB,内置FLASH可用空间为8MB。单个模型大小整体不超过8M。
## 二、单个算子限制
### 1. conv1dint/conv2dint/deconv2dint/pool相关算子共有限制
- kernel_size = {1,2,3,4,5},支持kernel_h ≠ kernel_w(conv1dint支持kernel_size >5)
- stride_size = {1,2,4},支持stride_h ≠ stride_w
- pad_size = {0,1,2,3},支持四个方向pad独立设置
- kernel_size = (1,2,3,4,5),支持kernel_h ≠ kernel_w(conv1dint支持kernel_size >5)
- stride_size = (1,2,4),支持stride_h ≠ stride_w
- pad_size = (0,1,2,3),支持四个方向pad独立设置
- in_w >= weight_w,同时in_h >= weight_h
- weight_w >= stride_w,同时weight_h >= stride_h
- pad_h_up < weight_h,同时pad_h_down < weight_h
- pad_w_right < weight_w, 同时pad_w_left < weight_w
### 2. deconv的限制
* stride_h(stride_w) = 2时,kernel_h(kernel_w) = {2,3,4,5}
* stirde_h(stride_w) = 4时,kernel_h(kernel_w) = {4,5}
* stride_h(stride_w) = 2时,kernel_h(kernel_w) = (2,3,4,5)
* stirde_h(stride_w) = 4时,kernel_h(kernel_w) = (4,5)
### 3. linearInt/BmmInt的限制
* 左边输入矩阵(M*N)对齐后大小不超过64KB
* 不同数据类型下对齐方式:
Expand Down
4 changes: 2 additions & 2 deletions thinker/executor/c_api/thinker_define.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

#define STR_IMP(x) #x
#define STR(x) STR_IMP(x)
#define THINKER_VERSION_MAJOR 1
#define THINKER_VERSION_MINOR 1
#define THINKER_VERSION_MAJOR 2
#define THINKER_VERSION_MINOR 0
#define THINKER_VERSION_PATCH 0
#define THINKER_VERSION \
STR(THINKER_VERSION_MAJOR) \
Expand Down
117 changes: 16 additions & 101 deletions thinker/executor/core/ops/venus/iqadd.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,17 @@ int32_t iqadd_luna(tTensor *X1, tTensor *X2, tTensor *Temp, tTensor *Y) {
void *src1 = (void *)X1->dptr_;
void *src2 = (void *)X2->dptr_;
void *dst = (void *)Y->dptr_;
int8_t *tmp_buf1 = NULL;
int8_t *tmp_buf2 = NULL;
size_t size = getTensorSize(X1);

if ((x1_q < y_q) || x2_q < y_q) {
return ret;
}

int32_t x1_is_psram = 0;
int32_t x2_is_psram = 0;
int32_t y_is_psram = 0;
int32_t used_tmp_size = 0;

if (Temp)
{
tmp_buf1 = (int8_t *)Temp->dptr_;
}

if ((1 == X1->mem_.type_ || 3 == X1->mem_.type_) &&
(Temp)) // need copy psram to share
{
Expand All @@ -48,55 +46,28 @@ int32_t iqadd_luna(tTensor *X1, tTensor *X2, tTensor *Temp, tTensor *Y) {
}

if (equalShape(&X1->shape_, &X2->shape_) && (X1->dtype_ == X2->dtype_)) {
int32_t scale1 = 1;
int32_t scale2 = 1;
int32_t shift1 = 0;
int32_t shift2 = 0;

if (x1_q > y_q)
{
shift1 = x1_q - y_q;
}
else
{
scale1 = (1 << (y_q - x1_q));
}
if (x2_q > y_q)
{
shift2 = x2_q - y_q;
}
else
{
scale2 = (1 << (y_q - x2_q));
}
int32_t shift1 = x1_q - y_q;
int32_t shift2 = x2_q - y_q;
switch (X1->dtype_) {
case Int8: {
if (x1_is_psram){
src1 = (int8_t *)Temp->dptr_;
memcpy(src1, (void *)X1->dptr_, size * sizeof(int8_t));
if (x1_q != y_q){
ret = luna_scale_q7_int8((const q7_t *)src1, (scale1), (int8_t *)src1, size, shift1);
}
}
else{
if (x1_q != y_q){
src1 = (int8_t *)Temp->dptr_;
ret = luna_scale_q7_int8((const q7_t *)X1->dptr_, (scale1), (int8_t *)src1, size, shift1);
}

if (x1_q != y_q){
ret = luna_scale_q7_int8((const q7_t *)src1, (1), (int8_t *)Temp->dptr_, size, shift1);
src1 = (int8_t *)Temp->dptr_;
}

if (x2_is_psram){
src2 = (int8_t *)Temp->dptr_ + (x1_is_psram) * size;
src2 = (int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q)) * size;
memcpy(src2, (void *)X2->dptr_, size * sizeof(int8_t));
}
if (x2_q != y_q){
ret = luna_scale_q7_int8((const q7_t *)src2, (scale2), (int8_t *)src2, size, shift2);
ret = luna_scale_q7_int8((const q7_t *)src2, (1), (int8_t *)(int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q))* size, size, shift2);
src2 = (int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q))* size;
}
}
else{
if (x2_q != y_q){
src2 = (int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q)) * size;
ret = luna_scale_q7_int8((const q7_t *)X2->dptr_, (scale2), (int8_t *)src2, size, shift2);
}
}

if (y_is_psram){
dst = (int8_t *)Temp->dptr_;
Expand All @@ -107,62 +78,6 @@ int32_t iqadd_luna(tTensor *X1, tTensor *X2, tTensor *Temp, tTensor *Y) {
if (y_is_psram){
memcpy((void *)Y->dptr_, dst, size * sizeof(int8_t));
}

// if (Temp)
// {
// tmp_buf2 = tmp_buf1 + size * sizeof(int8_t);
// if (x1_is_psram)
// {
// memcpy(tmp_buf1, src1, size * sizeof(int8_t));
// src1 = tmp_buf1;
// used_tmp_size += size * sizeof(int8_t);
// }
// if (x2_is_psram)
// {
// memcpy(tmp_buf2, src2, size * sizeof(int8_t));
// src2 = tmp_buf2;
// used_tmp_size += size * sizeof(int8_t);
// }
// if (y_is_psram)
// {
// dst = tmp_buf1;
// }
// ret = luna_scale_q7_int8((const q7_t *)src1, (scale1), (int8_t *)tmp_buf1, size,
// shift1);
// ret = luna_scale_q7_int8((const q7_t *)src2, (scale2), (int8_t *)tmp_buf2, size,
// shift2);
// ret = luna_add_q7_int8((const q7_t *)tmp_buf1, (q7_t *)tmp_buf2, (int8_t *)dst,
// size, 0);
// }
// else
// {
// ret = luna_scale_q7_int8((const q7_t *)src1, (scale1), (int8_t *)dst, size,
// shift1);
// ret = luna_scale_q7_int8((const q7_t *)src2, (scale2), (int8_t *)src2, size,
// shift2);
// ret = luna_add_q7_int8((const q7_t *)dst, (q7_t *)src2, (int8_t *)dst,
// size, 0);
// }
// if (y_is_psram)
// {
// memcpy((void *)Y->dptr_, dst, size * sizeof(int8_t));
// }
} break;
case Int16: {
ret = luna_scale_q15_int16((const q15_t *)src1, (scale1), (int16_t *)dst,
size, shift1);
ret = luna_scale_q15_int16((const q15_t *)src2, (scale2), (int16_t *)src2,
size, shift2);
ret = luna_add_q15_int16((const q15_t *)dst, (q15_t *)src2,
(int16_t *)dst, size, 0);
} break;
case Int32: {
ret = luna_scale_q31_int32((const q31_t *)src1, (scale1), (int32_t *)dst,
size, shift1);
ret = luna_scale_q31_int32((const q31_t *)src2, (scale2), (int32_t *)src2,
size, shift2);
ret = luna_add_q31_int32((const q31_t *)dst, (q31_t *)src2,
(int32_t *)dst, size, 0);
} break;
default:
break;
Expand Down
64 changes: 40 additions & 24 deletions thinker/executor/core/ops/venus/iqsub.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,47 +22,63 @@ int32_t iqsub_luna(tTensor *X1, tTensor *X2, tTensor *Temp, tTensor *Y) {
return ret;
}

int32_t x1_is_psram = 0;
int32_t x2_is_psram = 0;
int32_t y_is_psram = 0;
int32_t used_tmp_size = 0;

if ((1 == X1->mem_.type_ || 3 == X1->mem_.type_) &&
(Temp)) // need copy psram to share
{
src1 = (void *)Temp->dptr_;
memcpy(src1, (void *)X1->dptr_, size * X1->byte_);
x1_is_psram = 1;
}

if ((1 == X2->mem_.type_ || 3 == X2->mem_.type_) &&
(Temp)) // need copy psram to share
{
src2 = (void *)Temp->dptr_;
memcpy(src2, (void *)X2->dptr_, size * X2->byte_);
x2_is_psram = 1;
}

if ((1 == Y->mem_.type_ || 3 == Y->mem_.type_) &&
(Temp)) // need copy psram to share
{
y_is_psram = 1;
}

if (equalShape(&X1->shape_, &X2->shape_) && (X1->dtype_ == X2->dtype_)) {
int32_t shift1 = x1_q - y_q;
int32_t shift2 = x2_q - y_q;
switch (X1->dtype_) {
case Int8: {
ret = luna_scale_q7_int8((const q7_t *)src1, (1), (int8_t *)dst, size,
shift1);
ret = luna_scale_q7_int8((const q7_t *)src2, (1), (int8_t *)src2, size,
shift2);
if (x1_is_psram){
src1 = (int8_t *)Temp->dptr_;
memcpy(src1, (void *)X1->dptr_, size * sizeof(int8_t));
}
if (x1_q != y_q){
ret = luna_scale_q7_int8((const q7_t *)src1, (1), (int8_t *)Temp->dptr_, size, shift1);
src1 = (int8_t *)Temp->dptr_;
}

if (x2_is_psram){
src2 = (int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q)) * size;
memcpy(src2, (void *)X2->dptr_, size * sizeof(int8_t));
}
if (x2_q != y_q){
ret = luna_scale_q7_int8((const q7_t *)src2, (1), (int8_t *)(int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q))* size, size, shift2);
src2 = (int8_t *)Temp->dptr_ + ((x1_is_psram) || (x1_q != y_q))* size;
}

if (y_is_psram){
dst = (int8_t *)Temp->dptr_;
}

ret = luna_sub_q7_int8((const q7_t *)dst, (q7_t *)src2, (int8_t *)dst,
size, 0);
} break;
case Int16: {
ret = luna_scale_q15_int16((const q15_t *)src1, (1), (int16_t *)dst,
size, shift1);
ret = luna_scale_q15_int16((const q15_t *)src2, (1), (int16_t *)src2,
size, shift2);
ret = luna_sub_q15_int16((const q15_t *)dst, (q15_t *)src2,
(int16_t *)dst, size, 0);
} break;
case Int32: {
ret = luna_scale_q31_int32((const q31_t *)src1, (1), (int32_t *)dst,
size, shift1);
ret = luna_scale_q31_int32((const q31_t *)src2, (1), (int32_t *)src2,
size, shift2);
ret = luna_sub_q31_int32((const q31_t *)dst, (q31_t *)src2,
(int32_t *)dst, size, 0);

if (y_is_psram){
memcpy((void *)Y->dptr_, dst, size * sizeof(int8_t));
}

} break;
default:
break;
Expand Down
24 changes: 23 additions & 1 deletion thinker/resource_packer/ops/iqSub.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,33 @@
import math
import numpy as np
from typing import List

from ...graph import Tensor
from ...enum_defines import DevType
from .base import iqBinaryOperator, register_op


@register_op
class iqSub(iqBinaryOperator):
pass
def get_workspace(self, dev_type: DevType) -> List[Tensor]:
x1 = self.inputs[0]
x2 = self.inputs[1]
size = x1.nbytes
Y = self.outputs[0]

scale_x = self.attrs["scale_x"]
scale_y = self.attrs["scale_y"]
scale_o = self.attrs["scale_o"]

workspace_size = 0
if (scale_x != scale_o) or x1.mem_type != MemType.SHARE_MEM:
workspace_size += size
if (scale_y != scale_o) or x2.mem_type != MemType.SHARE_MEM:
workspace_size += size
if Y.mem_type != MemType.SHARE_MEM:
workspace_size = max(workspace_size, size)

max_workspace = Tensor.from_shape([workspace_size], np.int8, MemType.SHARE_MEM)
return [max_workspace]

__all__ = ["iqSub"]

0 comments on commit 99156da

Please sign in to comment.