Skip to content

Latest commit

 

History

History
496 lines (408 loc) · 13.3 KB

README.md

File metadata and controls

496 lines (408 loc) · 13.3 KB

elk-springboot

1.elk-tcp模块在测试在src/test下进行测试

2.基于elasticsearch 实现crud 环境搭建

docker run -e ES_JAVA_OPTS="-Xms256m -Xmx256m" -d -p 9200:9200 -p 9300:9300  -e "discovery.type=single-node" -v /home/software/elasticsearch/plugins:/usr/share/elasticsearch/plugins  -v /home/software/elasticsearch/data:/usr/share/elasticsearch/data -v /home/software/elasticsearch/config/es-single.yml:/usr/share/elasticsearch/config/elasticsearch.yml --name elasticsearch-single elasticsearch:7.9.3

注:将需要的插件下载并放置于 /home/software/elasticsearch/plugins 目录下,常用的用中文分词,已经拼音

基于spring-boot 2.3.3版本

    <parent>
          <groupId>org.springframework.boot</groupId>
          <artifactId>spring-boot-starter-parent</artifactId>
          <version>2.3.3.RELEASE</version>
      </parent>

以及elasticsearch 7.9.3版本

   <properties>
        <java.version>1.8</java.version>
        <elasticsearch.version>7.9.3</elasticsearch.version>
    </properties>

添加对应依赖

   <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>

springboot配置 ElasticSearchConfig.java

import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.elasticsearch.client.ClientConfiguration;
import org.springframework.data.elasticsearch.client.RestClients;
import org.springframework.data.elasticsearch.config.AbstractElasticsearchConfiguration;
import org.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;
import org.springframework.http.HttpHeaders;

import java.time.Duration;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;

/**
 *  ElasticSearch配置
 */
@Configuration
public class ElasticSearchConfig extends AbstractElasticsearchConfiguration {
 
    /**
     * 高版本需声明RestHighLevelClient, ElasticsearchRestTemplate并需声明"elasticsearchOperations", "elasticsearchTemplate"
     * @return
     */
    @Bean(name = { "elasticsearchOperations", "elasticsearchTemplate" })
    public ElasticsearchRestTemplate elasticsearchRestTemplate(){
        return new ElasticsearchRestTemplate(elasticsearchClient());
    }


    @Override
    public RestHighLevelClient elasticsearchClient() {
        ClientConfiguration clientConfiguration = ClientConfiguration.builder()
                .connectedTo("47.99.200.71:9200")
                .withHeaders(()->{
                    HttpHeaders headers = new HttpHeaders();
                    headers.add("currentTime", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME));
                    return headers;
                })
                .withConnectTimeout(Duration.ofSeconds(30))
                .withSocketTimeout(Duration.ofSeconds(15))
                .build();
        return RestClients.create(clientConfiguration).rest();
    }
}

创建User实体

import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.Document;
import org.springframework.data.elasticsearch.annotations.Field;
import org.springframework.data.elasticsearch.annotations.FieldType;

import java.io.Serializable;

/**
 * 标记文档
 */
@Document(indexName = "test-use", type = "test-us")
public class User implements Serializable {

    /**
     * 标记id 必需要有
     */
    @Id
    private Long id;
    /**
     * 设置分词
     */
    @Field(name = "user_name", type = FieldType.Text, analyzer = "ik_smart", searchAnalyzer = "ik_max_word")
    private String userName;

    /**
     * 设置分词
     */
    @Field(name = "desc", type = FieldType.Text, analyzer = "ik_smart", searchAnalyzer = "ik_max_word")
    private String desc;

    /**
     * 自动检测类型
     */
    @Field(type = FieldType.Auto)
    private Integer age;

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getUserName() {
        return userName;
    }

    public void setUserName(String userName) {
        this.userName = userName;
    }

    public String getDesc() {
        return desc;
    }

    public void setDesc(String desc) {
        this.desc = desc;
    }

    public Integer getAge() {
        return age;
    }

    public void setAge(Integer age) {
        this.age = age;
    }
}

创建对应dao

import com.elk.elktcp.entity.User;
import org.springframework.data.elasticsearch.annotations.Highlight;
import org.springframework.data.elasticsearch.annotations.HighlightField;
import org.springframework.data.elasticsearch.annotations.HighlightParameters;
import org.springframework.data.elasticsearch.core.SearchHit;
import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;

import java.util.List;

public interface UserDao extends ElasticsearchRepository<User,Long> {
    @Highlight(fields = {
            @HighlightField(name = "userName", parameters = @HighlightParameters(
                    preTags = "<strong>",
                    postTags = "</strong>",
                    fragmentSize = 500,
                    numberOfFragments = 3))

    })
    List<SearchHit<User>> findByUserNameLike(String userName);



    @Highlight(fields = {
            @HighlightField(name = "desc", parameters = @HighlightParameters(
                    preTags = "<strong>",
                    postTags = "</strong>",
                    fragmentSize = 500,
                    numberOfFragments = 3))

    })
    List<SearchHit<User>> findByDescLike(String desc);

}

ElasticsearchRestTemplate

 /**
     * BoolQuery( ) 用于组合多个叶子或复合查询子句的默认查询
     * must 相当于 与 & =
     * must not 相当于 非 ~   !=
     * should 相当于 或  |   or
     * filter  过滤
     */

    @Autowired
    private ElasticsearchRestTemplate elasticsearchRestTemplate;

    @RequestMapping("member_search")
    public Response memberSearch() {
        //BoolQueryBuilder用来组装查询的条件
        BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
        //组装年龄条件
        RangeQueryBuilder age = QueryBuilders.rangeQuery("age");
        //年龄小于20的
        RangeQueryBuilder lt = age.gte(10).lte(100);
        //组装时间条件
        RangeQueryBuilder createTime = QueryBuilders.rangeQuery("create_time");
        //组装创建时间大于等于"2020-09-16 11:40:27小于等于"2020-09-17 11:40:43"的条件
        RangeQueryBuilder timeRange = createTime
                .format("yyyy-MM-dd HH:mm:ss")
                .gte("2019-09-10 11:40:27")
                .lte("2020-09-20 11:40:43");
        //将组装完的条件用queryBuilder.must()进行组合
        BoolQueryBuilder resultQuery = queryBuilder
                .must(lt)
                .must(timeRange);
        //NativeSearchQueryBuilder用来进行查询的调整比如排序,分页等;
        NativeSearchQueryBuilder builder = new NativeSearchQueryBuilder();
        NativeSearchQuery nativeSearchQuery = builder
                .withFilter(resultQuery)//关联查询条件
                .withPageable(PageRequest.of(0, 10))//分页条件
                .withSort(SortBuilders.fieldSort("create_time.keyword").order(SortOrder.ASC))
                .build();
        SearchHits<MemberInfo> memberInfoSearchHits = elasticsearchRestTemplate.search(nativeSearchQuery, MemberInfo.class, IndexCoordinates.of("t_member"));
        //获取得到的数据集合
        List<MemberInfo> memberInfos = memberInfoSearchHits.toList().parallelStream().map(SearchHit::getContent).collect(Collectors.toList());
        PageInfo<MemberInfo> pageInfo = new PageInfo<>(memberInfos);
        pageInfo.setTotal(memberInfoSearchHits.getTotalHits());
        return Response.success(pageInfo);
    }

附录: 基本API 1.创建索引 PUT /index

2.删除索引 DELETE /index

3.新建文档 建立索引

PUT /index/type/id
{
    json数据
}

4.检索文档 GET /index/type/id

5.修改文档信息 5.1 替换(覆盖)同样的id会覆盖之前放入的信息(必须带上所有的field,才能去进行信息的修改,否则只会把带的信息存储进去,其他数据丢失)

PUT /index/type/id
{
    json数据
}

5.2 更新文档

POST /index/type/id/_update
{
    json数据
}

6.删除文档 DELETE /index/type/id

7.查询index/type下全部数据 GET /index/type/_search

8.查询index/type下a字段包含xxx,并按照c字段降序排序 GET /index/type/_search?q=a:xxx&sort=c:desc

DSL(Domain Specified Language,特定领域的语言) 1.查询index/type下的全部数据

GET /index/type/_search
{
    "query":{"match_all":{}}
}

2.查询index/type下a字段包含xxx,并按照c字段降序排序

GET /index/type/_search
{
    "query":{
        "match":{
            "a":"xxx"
        }
    },
    "sort":[
        {"c":"desc"}
    ]
}

3.分页查询index/type下的全部数据

GET /index/type/_search
{
    "query":{"match_all":{}},
    "form":1,
    "size":10
}

4.查询index/type下的指定字段[a,b,c]的全部数据

GET /index/type/_search
{
    "query":{"match_all":{}},
    "_source":["a","b","c"]
}

5.搜索index/type下a字段包含xxx和yyy的数据(全文检索)

GET /index/type/_search
{
    "query":{
        "bool":{
            "must":{
                "match":{
                    "a":"xxx yyy"
                        }
                  }
              }     
    }
}

6.搜索index/type下a字段包含xxx,而且b字段大于100的数据

GET /index/type/_search
{
    "query":{
        "bool":{
            "must":{
                "match":{
                    "a":"xxx"
                        }
                  }
              },
        "filter":{
                "range":{
                       "b":{ "gt":100 }
                        }
                }      
    }
}

7.搜索index/type下a字段包含xxx yyy短语的数据(phrase search,要求输入的搜索串,必须在指定的字段文本中,完全包含一模一样的,才可以算匹配,才能作为结果返回)

GET /index/type/_search
{
    "query":{
        "match_phrase":{
            "a":"xxx yyy"
        }
    }
}

8.高亮搜索index/type下a字段包含xxx的数据

GET /index/type/_search
{
    "query":{
        "match":{
            "a":"xxx"
        }
    },
    "highlight":{
        "fields":{
            "a":{}
        }
    }
}

聚合查询

- size: 查询条数,这里设置为0,因为我们不关心搜索到的数据,只关心聚合结果,提高效率
- aggs:声明这是一个聚合查询,是aggregations的缩写
  - popular_colors:给这次聚合起一个名字,任意。
    - terms:划分桶的方式,这里是根据词条划分
      - field:划分桶的字段

- hits:查询结果为空,因为我们设置了size为0
- aggregations:聚合的结果
  - brand_aggs:我们定义的聚合名称
    - buckets:查找到的桶,每个不同的brand字段值都会形成一个桶
      - key:这个桶对应的brand字段的值
      - doc_count:这个桶中的文档数量

桶内度量

- aggs:我们在上一个aggs(brand_aggs)中添加新的aggs。可见度量也是一个聚合
  - price_aggs:聚合的名称
    - avg:度量的类型,这里是求平均值
      - field:度量运算的字段

ik配置文件地址:es/plugins/ik/config目录

IKAnalyzer.cfg.xml:用来配置自定义词库

main.dic:ik原生内置的中文词库,总共有27万多条,只要是这些单词,都会被分在一起

quantifier.dic:放了一些单位相关的词

suffix.dic:放了一些后缀

surname.dic:中国的姓氏

stopword.dic:英文停用词

自定义分词步骤:

新增一个z_SelfAdd.dic文件,在里面加上新的单词,保存为UTF-8
然后在当前目录下的IKAnalyzer.cfg.xml配置文件中下加上<entry key="ext_dict">z_SelfAdd.dic</entry>
将刚才命名的文件加入
重启服务器,就生效了

edge_ngram是从第一个字符开始,按照步长,进行分词,适合前缀匹配场景,比如:订单号,手机号,邮政编码的检索 ngram是从每一个字符开始,按照步长,进行分词,适合前缀中缀检索

使用sql语句查询工具 https://github.com/NLPchina/elasticsearch-sql 离线安装:查找对应elasticsearch版本下载,放置于es的plugins的目录下

使用post请求,内容json形式

http://ip:9200/_nlpcn/sql
{
	"sql":"select * from indexName  limit 10"
}```

1)条件查询
SELECT * FROM bank WHERE age >30 AND gender = 'm'

2)聚合
select COUNT(*),SUM(age),MIN(age) as m, MAX(age),AVG(age)
  FROM bank GROUP BY gender ORDER BY SUM(age), m DESC
  
3)删除
DELETE FROM bank WHERE age >30 AND gender = 'm'

4)geo地理坐标
SELECT * FROM locations WHERE GEO_BOUNDING_BOX(fieldname,100.0,1.0,101,0.0)

5)需要指定index+type
SELECT * FROM indexName/type

6)如何指定路由
select /*! ROUTINGS(salary) */ sum(count)  from index where type="salary"