In [1]:
import pandas as pd
from apriori import load_apriori, getAsso
from glm import ask_glm, init_ai, chatTools, chat_glmOnce


def load_data(fname):
    df = pd.read_csv(fname)
    # print(df.head())
    return df


dataset = load_data("招聘数据集(含技能列表).csv")
biga_load = load_apriori("apriori.bin")
s = getAsso(biga_load, 'linux', 0.1)
print(s)

In [24]:
def get_asso_skill(dataset, skill, prompt):
    print(f"get_asso_skill: {skill}")
    biga_load = load_apriori("apriori.bin")
    s = getAsso(biga_load, skill.lower(), 0.1)
    print(f"{skill}对应的关联技术是：{s}")
    if len(s) == 0:
        return prompt
    else:
        s = list(s)
        s = ', '.join(s)
        return f"针对问题{prompt},我查询到关联性最高的技术是{str(s)}，请将查询结果组织成人类语言描述。可以附加一些学习建议。总字数不超过200字"

# dataset为当前数据集，skill是技能词汇，例如'java','linux'，count表示返回做多几个岗位。
def cacu_skill_position_wordcount(dataset, skill, prompt, count=2):
    print(f"get_asso_skill: {skill}")
    # 将skill转为小写。
    skill = skill.lower()
    # 构建字典，方便后续排序
    postion_dict = {}
    # 遍历dataset
    for i in range(len(dataset)):
        # 将dataset中这一行的'skill_list'的内容取出来，使用逗号进行切割。
        skills = dataset['skill_list'][i].split(',')
        # 遍历切割后的技能列表
        for s in skills:
            # 如果当前技能与目标技能相同，则进行统计
            if s.strip().lower() == skill:
                # 获取当前行的职位名称
                position = dataset['positionName'][i]
                # 如果该职位在字典中，则将其词频加1，否则初始化为1
                if position in postion_dict:
                    postion_dict[position] += 1
                else:
                    postion_dict[position] = 1
    # 将字典按照value排序，倒序排列，取前面count个成员
    sorted_positions = sorted(postion_dict.items(), key=lambda x: x[1], reverse=True)[:count]
    return f"针对问题{prompt},我查询到关联性最高的岗位是{str(sorted_positions)}，请将查询结果组织成人类语言描述。可以附加一些建议。总字数不超过200字"


# 忽略字符串大小写做字符串对比，key如果落在str中也算匹配
def compare_str(str1, key):
    s1 = str1.lower()
    s2 = key.lower()
    if s1 == s2:
        return True
    if s2 in s1:
        return True
    return False


def cacu_postion_skill_wordcount(dataset, postionName, prompt, count=4):
    print(f"get_asso_skill: {postionName}")
    postion_data = []
    # 将postionName和数据集中'positionName'列匹配的行抽取出来，放入postion_data
    for index, row in dataset.iterrows():
        if compare_str(row['positionName'], postionName):
            postion_data.append(row)
    # 将postion_data的'skill_list'放入skill_list
    skill_list = []
    for row in postion_data:
        skill_list.extend(row['skill_list'].split(','))
    
    # 将skill_list中的每个成员都用逗号切割，将所有切割出来的成员做词频统计
    skill_dict = {}
    for skill in skill_list:
        skill = skill.strip()  # 去除前后空格
        if skill:
            if skill in skill_dict:
                skill_dict[skill] += 1
            else:
                skill_dict[skill] = 1
    # 将统计出来的词频字典倒序排列，取最高的count个成员返回
    sorted_skills = sorted(skill_dict.items(), key=lambda x: x[1], reverse=True)[:count]

    return f"针对问题{prompt},我查询到关联性最高的技术是{str(sorted_skills)}，请将查询结果组织成人类语言描述。可以附加一些建议。总字数不超过200字"


query_asso_skill_tool = {
    "type": "function",
    "function": {
        "name": "get_asso_skill",
        "description": "针对用户进行学习推荐。根据用户提示的技能，找出关联的技能用于学习推荐",
        "parameters": {
            "type": "object",
            "properties": {
                "skill": {
                    "type": "string",
                    "description": "技能名称，如java，python，c++",
                },
            },
            "required": ["skill"],
        },
    }
}

query_skill_positionl_tool = {
    "type": "function",
    "function": {
        "name": "cacu_skill_position_wordcount",
        "description": "针对用户进行岗位推荐。根据用户提示的技能，找出关联的岗位用于推荐",
        "parameters": {
            "type": "object",
            "properties": {
                "skill": {
                    "type": "string",
                    "description": "技能名称，如java，python，c++",
                },
            },
            "required": ["skill"],
        },
    }
}

query_positionl_skill_tool = {
    "type": "function",
    "function": {
        "name": "cacu_postion_skill_wordcount",
        "description": "针对用户进行技能推荐。根据用户提示的岗位，找出关联的技能用于推荐",
        "parameters": {
            "type": "object",
            "properties": {
                "postionName": {
                    "type": "string",
                    "description": "岗位名称，如Java工程师，Python工程师，C/C++开发工程师，算法工程师，PHP开发工程师",
                },
            },
            "required": ["postionName"],
        },
    }
}

tools = [query_asso_skill_tool, query_skill_positionl_tool, query_positionl_skill_tool]
function_list = {"get_asso_skill": get_asso_skill,
                 "cacu_skill_position_wordcount": cacu_skill_position_wordcount,
                 "cacu_postion_skill_wordcount": cacu_postion_skill_wordcount,
                 }

In [3]:
def fun(text):
    ack = chatTools(init_ai(), text, tools, 0.6)
    result = ask_glm(init_ai(), tools, 0.6, text, function_list, dataset)
    print("提问: " + result)
    print(chat_glmOnce(init_ai(), result).message.content)

In [11]:
fun("我java学得不错，请推荐相关的技术学习")

In [12]:
fun("掌握python可以从事什么岗位？")

In [25]:
fun("php工程师岗位需要什么技能？")