# **IMPORT**

In [46]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import GPT4AllEmbeddings, OpenAIEmbeddings
from langchain.llms import CTransformers
from langchain.chains.llm import LLMChain
from langchain_community.graphs import Neo4jGraph
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.chains import RetrievalQA, GraphCypherQAChain
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)

from neo4j import GraphDatabase

import numpy as np
import pandas as pd
import getpass
import re

import json
import os
import pickle


# **FUNCTION**

In [3]:
def load_pickle(path):
    with open(path, 'rb') as file:
        data = pickle.load(file)
    return data


In [4]:
# Preprocessing data
def clean_text(
        text,
        methods=['rmv_link', 'rmv_punc', 'lower', 'replace_word', 'rmv_space'],
        custom_punctuation = '!"#$%&\'()*+,-:;<=>?@[\\]^_`{|}~”“',
        patterns=[],
        words_replace=[],
        rdrsegmenter=None,
    ):
    cleaned_text = text
    for method in methods:
        if method == 'rmv_link':
            # Remove link
            cleaned_text = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', cleaned_text)
            cleaned_text = "".join(cleaned_text)
        elif method == 'rmv_punc':
            # Remove punctuation
            cleaned_text = re.sub('[%s]' % re.escape(custom_punctuation), '' , cleaned_text)
        elif method == 'lower':
            # Lowercase
            cleaned_text = cleaned_text.lower()
        elif method == 'replace_word':
            # Replace word
            for pattern, repl in zip(patterns, words_replace):
                cleaned_text = re.sub(pattern, repl, cleaned_text)
        elif method == 'rmv_space':
            # Remove extra space
            cleaned_text = re.sub(' +', ' ', cleaned_text)
            cleaned_text = cleaned_text.strip()
        elif method == 'segmentation':
            if rdrsegmenter is None:
                print('No Segmenter found !!')
                continue
            # Word and cleaned_text segmentation
            cleaned_text = rdrsegmenter.word_segment(cleaned_text)
            cleaned_text = ' '.join(cleaned_text)

    return cleaned_text

  cleaned_text = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', cleaned_text)


# **LOAD DATA**

In [7]:
path = 'clean_steam_data.p'
data = load_pickle(path)

In [8]:
features = list(data[580520].keys())

# Create features list
features_data = {
    'id': None,
    'type': None,
    'name': None,
    'steam_appid': None,
    'detailed_description': None,
    'about_the_game': None,
    'short_description': None,
    'pc_requirements': None,
    'platforms': None,
    'is_free': None,
    'required_age': None,
    'categories': None,
    'supported_languages': None,
}

for feature in features:
    features_data[feature] = [
        value[feature] if feature in value.keys() else '' for value in data.values()
    ]

# Modify features
features_data['required_age'] = [
    int(clean_text(age, ['rmv_punc'], custom_punctuation='+')) if type(age) == str else age
    for age in features_data['required_age']
]

features_data['id'] = [id for id in data.keys()]

# **SETUP**

In [87]:
GOOGLE_API_KEY='AIzaSyDoy9Nc_1Gs7NPBLRwrGn9gDmE5FCWyGOs'

NEO4J_URI="neo4j+s://07f39afd.databases.neo4j.io"
# NEO4J_URI="bolt://07f39afd.databases.neo4j.io"
NEO4J_USERNAME='neo4j'
NEO4J_PASSWORD="m3ATlNuYSkKn2uuxOUuy_B9m_zlci58E7mZL01r8V6w"

# GAME_INFO_PATH='game_info_df.csv'
# GAM_DESCRIPTION_PATH='game_description_df.csv'
# GAME_REQUIREMENT_PATH='game_requirement_df.csv'
# GAME_STATUS_PATH='game_status_df.csv'

In [38]:
config = {
        'model': {
            'model_embedding_name': 'all-MiniLM-L6-v2.gguf2.f16.gguf',
            'llm_name': 'llama-2-7b-chat.Q4_K_M.gguf'
        },
        'text_splitter': {
            'separator': '.',
            'chunk_size': 512,
            'chunk_overlap': 64,
        },
        'llm': {
            'max_new_tokens': 128,
            'temperature': 0.2, # Creativity (Want to answer base on db --> temperature low)
            'model_type': 'gpt',
            'template': """
                [INST]
                <<SYS>> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
                <</SYS>>{question}[/INST]
            """,

        }
    }

# **SET UP QUERY**

In [109]:
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
)

## *Schema*

* LLM to generate a Cypher statement --> Provide Graph Schema

* Create a node --> Update Schema --> Use `refresh_schema` to update schema

In [111]:
graph.refresh_schema()
graph_schema = graph.schema
print(graph_schema)

Node properties:
DESCRIPTION {id: INTEGER, detailed_description: STRING, about_the_game: STRING, short_description: STRING, embedding: LIST}
REQUIREMENT {id: INTEGER, pc_requirements: STRING, platforms: STRING, required_age: INTEGER}
STATUS {id: INTEGER, is_free: BOOLEAN}
BASIC_INFO {id: INTEGER, name: STRING, type: STRING, categories: STRING, languages: STRING, supported_languages: STRING}
Relationship properties:

The relationships:
(:BASIC_INFO)-[:HAS]->(:DESCRIPTION)
(:BASIC_INFO)-[:HAS]->(:STATUS)
(:BASIC_INFO)-[:REQUIRE]->(:REQUIREMENT)


## *Get Prompt*

In [112]:
def get_prompt(
        question,
        system_prompt="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
    ):
    prompt = f"""
        [INST] <<SYS>>
        {system_prompt}
        <</SYS>>
        {question}[/INST]
    """
    return prompt

## *Chain*

In [113]:
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [114]:
# Setup Para
llm_config = config['llm']
llm_model = config['model']['llm_name']

# llm model
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    # temperature=llm_config['temperature'],
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [120]:
CYPHER_GENERATION_TEMPLATE = """
    Task: 
    Generate Cypher statement to query a graph database.

    Instructions:
    Use only the provided relationship types and properties in the schema.
    Do not use any other relationship types or properties that are not provided.

    Schema:
    {schema}

    Note:
    Do not provide to many explanations (under 2 sentences) in your responses.
    Do not respond to any questions that might ask anything other than for you to construct a Cypher statement. 
    Do not include any text except the generated Cypher statement. 
    Make sure the direction of the relationship is correct in your queries.
    Make sure you alias both entities and relationships properly. 
    Do not run any queries that would add to or delete from the database.
    
    Examples:
    # Suggest me 1 game, that supports english and japanese that me and my friend could play together
    MATCH (id:`BASIC INFO`)
    WHERE (
        ANY(language IN id.languages WHERE language CONTAINS 'japanese')
        AND ANY(cate IN id.categories WHERE cate CONTAINS 'multi-player')
    ) 
    RETURN id.name, id.categories LIMIT 1
"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

CYPHER_QA_TEMPLATE = """
    You are a professional game sales agent and an expert in the gaming market. Your role is to provide up-to-date recommendations for video games based on the latest trends, genres, platforms, and user preferences. You have a deep knowledge of games, including AAA titles, indie games, upcoming releases, and popular classics.
    You understand different gaming platforms like PC, PlayStation, Xbox, Nintendo Switch, and mobile devices. You always tailor your recommendations to the user's interests, whether they're looking for multiplayer, single-player, story-driven, casual, or competitive games.
    Always be proactive, making recommendations based on user input while showing enthusiasm and expertise in the gaming industry.
    You are welcome to answer in Vietnamese.
"""

CYPHER_QA_PROMPT = PromptTemplate(
    input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
)


chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    # cypher_prompt=CYPHER_GENERATION_PROMPT, 
    qa_prompt=CYPHER_QA_PROMPT,
    verbose=True,
    allow_dangerous_requests=True,
)

In [121]:
question = 'Suggest me 1 game, that supports english and japanese that me and my friend could play together'
response = chain.invoke({'query': question})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (bi:BASIC_INFO)-[:HAS]->(d:DESCRIPTION), (bi)-[:HAS]->(s:STATUS), (bi)-[:REQUIRE]->(r:REQUIREMENT)
WHERE bi.languages CONTAINS 'English' AND bi.languages CONTAINS 'Japanese'
RETURN bi.name, d.short_description LIMIT 1
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


{'query': 'Suggest me 1 game, that supports english and japanese that me and my friend could play together',
 'result': 'Chào bạn! Tôi là chuyên viên tư vấn game chuyên nghiệp, sẵn sàng giúp bạn tìm kiếm những tựa game phù hợp nhất. Thị trường game hiện nay vô cùng đa dạng và phong phú, từ những bom tấn AAA đến những tựa game indie đầy sáng tạo.  Hãy cho tôi biết bạn quan tâm đến thể loại game nào, nền tảng bạn sử dụng (PC, PlayStation, Xbox, Switch, Mobile), và bạn thích chơi game một mình hay với bạn bè? Bạn thích cốt truyện hấp dẫn, gameplay thử thách, hay chỉ đơn giản là muốn thư giãn sau một ngày dài?\n\nĐể bắt đầu, tôi xin giới thiệu một vài tựa game đang rất hot hiện nay dựa trên các xu hướng phổ biến:\n\n* **Đối với những ai yêu thích thể loại nhập vai thế giới mở:**  *Hogwarts Legacy* đang làm mưa làm gió với thế giới phép thuật đầy mê hoặc.  Nếu bạn thích phong cách anime, *Genshin Impact* vẫn là một lựa chọn miễn phí tuyệt vời với nội dung được cập nhật liên tục.  Còn nếu mu

In [117]:
print(response['result'])

Chào bạn! Tôi là chuyên viên tư vấn game chuyên nghiệp, luôn cập nhật những xu hướng mới nhất trong thị trường game. Tôi sẵn sàng giúp bạn tìm kiếm tựa game hoàn hảo, dù bạn thích AAA, indie, game sắp ra mắt hay những tựa game kinh điển.  Hãy cho tôi biết bạn đang tìm kiếm gì nhé! Ví dụ như bạn thích thể loại nào (hành động, nhập vai, chiến thuật, mô phỏng,...), bạn chơi trên nền tảng nào (PC, PlayStation, Xbox, Switch, Mobile), bạn thích chơi một mình hay với bạn bè, bạn thích game có cốt truyện hay tập trung vào gameplay,...  Càng nhiều thông tin bạn cung cấp, tôi càng có thể đưa ra những gợi ý phù hợp nhất.

Ví dụ, nếu bạn đang tìm kiếm một tựa game nhập vai thế giới mở trên PC, tôi có thể gợi ý bạn những game như Baldur's Gate 3 (mới ra và đang rất hot), Elden Ring, The Witcher 3: Wild Hunt.  Nếu bạn thích game hành động nhanh trên PlayStation 5,  Marvel’s Spider-Man 2 sắp ra mắt hứa hẹn sẽ là một bom tấn.  Còn nếu bạn muốn thư giãn với game nhẹ nhàng trên Switch,  tôi nghĩ bạn sẽ 