In [1]:
import os
from dotenv import load_dotenv
load_dotenv("../.env")
# Warning control
import warnings
warnings.filterwarnings('ignore')

In [2]:
from huggingface_hub import InferenceClient

In [3]:
#### using hugging face API service to access Open Source Models..........
client = InferenceClient(api_key=os.getenv('HF_TOKEN'))

messages = [
	{ "role": "user", "content": "hi" },
	{ "role": "assistant", "content": "can you write a function remove curse words from a input text." }
]

stream = client.chat.completions.create(
    model=os.getenv('HF_MODEL'), 
	messages=messages, 
	temperature=0.5,
	max_tokens=2048,
	top_p=0.7,
	#stream=True
)

In [4]:
print(stream.choices[0].message.content)

Certainly! Below is a Python function that removes curse words from an input text. This function uses a predefined list of curse words and removes any occurrence of these words from the input text. You can customize the list of curse words as needed.

```python
def remove_curse_words(input_text, curse_words):
    # Convert the input text to lowercase to make the comparison case-insensitive
    input_text_lower = input_text.lower()
    
    # Iterate over each curse word and replace it with an empty string
    for word in curse_words:
        input_text_lower = input_text_lower.replace(word, '')
    
    # Reconstruct the output text while preserving the original case
    output_text = ''
    curse_words_set = set(curse_words)
    for word in input_text.split():
        if word.lower() not in curse_words_set:
            output_text += word + ' '
    
    # Remove the trailing space
    output_text = output_text.strip()
    
    return output_text

# Example usage
curse_words = ['badwor

In [5]:
from crewai import Agent, Task, Crew, LLM
from IPython.display import Markdown

In [6]:
from typing import List, Dict, Any
from crewai.utilities.exceptions.context_window_exceeding_exception import (
    LLMContextLengthExceededException,
)
import logging

In [7]:
class HfLLM(LLM):
    """Hugging Face Large Language Model (LLM)"""
    def __init__(
            self, 
            model: str, 
            api_key: str, 
            temperature: float, 
            max_tokens: int, 
            top_p: float
    ) -> None:
        super().__init__(
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            api_key=api_key,
        )
    
    def get_context_window_size(self) -> int:
        # Only using 75% of the context window size to avoid cutting the message in the middle
        return int(128_000 * 0.75)
    
    def call(
        self, 
        messages: List[Dict[str, str]], 
        callbacks: List[Any] = []
    ) -> str:
        #### using hugging face API service to access Open Source Models..........
        try:
            client = InferenceClient(api_key=self.api_key)
            output = client.chat.completions.create(
                model=self.model, 
	            messages=messages, 
	            temperature=self.temperature,
	            max_tokens=self.max_tokens,
	            top_p=self.top_p,
            )
            return output.choices[0].message.content
        except Exception as e:
                if not LLMContextLengthExceededException(
                    str(e)
                )._is_context_limit_error(str(e)):
                    logging.error(f"LLM call failed: {str(e)}")

                raise  # Re-raise the exception after logging


In [8]:
hf_llm = HfLLM(
    model="Qwen/Qwen2.5-72B-Instruct", 
	temperature=0.5,
	max_tokens=2048,
	top_p=0.7,
    api_key=os.getenv('HF_TOKEN')
)

In [9]:
response = hf_llm.call(messages = [
	{ "role": "user", "content": "hi" },
	{ "role": "assistant", "content": "how knight moves in chess." }
])

In [10]:
print(response)

Hello! It seems like you might have been interested in how a knight moves in chess, but your message got a bit mixed up. Let me explain:

In chess, the knight moves in an "L" shape. This means it can move two squares in one direction (horizontally or vertically) and then one square perpendicular to that direction, or one square in one direction and then two squares perpendicular to that direction. The knight is the only piece that can "jump" over other pieces, making it unique and very versatile.

Here are a few examples of how a knight can move from a central square (e.g., d4):

- From d4, the knight can move to b3, b5, c2, c6, e2, e6, f3, or f5.

Would you like to know more about chess or the knight's movements?


In [11]:
type(hf_llm)

__main__.HfLLM

In [12]:
crewai_hf_llm = LLM(
    model=f"{os.getenv('REMOTE_PROVIDER')}/{os.getenv('HF_MODEL')}", 
    api_key=os.getenv('HF_TOKEN')
)

In [13]:
crewai_hf_llm.call(
    messages=[
	{ "role": "user", "content": "hi" },
	{ "role": "assistant", "content": "how rock moves in chess." }
]
)

'It seems like there might be a bit of a mix-up in your question. In chess, the piece you might be referring to is the "rook," not "rock." The rook is a powerful piece that moves horizontally or vertically, any number of squares, across the board. Here are the key points about the rook\'s movement:\n\n1. **Horizontal Movement:** The rook can move any number of squares to the left or right, as long as there are no pieces blocking its'

In [14]:
from crewai_tools import CSVSearchTool, FileReadTool, DirectoryReadTool

In [15]:
csv_search = CSVSearchTool(
    csv=os.path.join(os.pardir, os.getenv('TEMP_FILES_PATH'), 'Iris.csv'),
    config = dict(
        llm=dict(
            provider="ollama",
            config=dict(
                model="llama3.2:1b",
            )
        ),
        embedder=dict(
            provider="ollama",
            config=dict(
                model="nomic-embed-text:latest",
            )
        ),
        vectordb=dict(
            provider="chroma",
            config=dict(
                dir="../assets/data/databases",
                collection_name="iris"
            ),
        ),    
    )
)

  embeddings = OllamaEmbeddings(model=self.config.model, base_url=config.base_url)


In [16]:
test_results = csv_search._run(search_query="what's the average value of SepalLengthCm for Iris-versicolor?")
print(test_results)

Relevant Content:
Id: 100, SepalLengthCm: 5.7, SepalWidthCm: 2.8, PetalLengthCm: 4.1, PetalWidthCm: 1.3, Species: Iris-versicolor

Id: 80, SepalLengthCm: 5.7, SepalWidthCm: 2.6, PetalLengthCm: 3.5, PetalWidthCm: 1.0, Species: Iris-versicolor

Id: 68, SepalLengthCm: 5.8, SepalWidthCm: 2.7, PetalLengthCm: 4.1, PetalWidthCm: 1.0, Species: Iris-versicolor


In [17]:
# Presidio imports
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine

In [18]:
presidio_analyzer = AnalyzerEngine()
presidio_anonymizer= AnonymizerEngine()



In [19]:
# First, let's analyze the text
text = "can you tell me what orders i've placed in the last 3 months? my name is Hank Tate and my phone number is 555-123-4567"
analysis = presidio_analyzer.analyze(text, language='en')

In [20]:
analysis

[type: DATE_TIME, start: 43, end: 60, score: 0.85,
 type: PERSON, start: 73, end: 82, score: 0.85,
 type: PHONE_NUMBER, start: 106, end: 118, score: 0.75]

In [21]:
print(presidio_anonymizer.anonymize(text=text, analyzer_results=analysis))

text: can you tell me what orders i've placed in <DATE_TIME>? my name is <PERSON> and my phone number is <PHONE_NUMBER>
items:
[
    {'start': 99, 'end': 113, 'entity_type': 'PHONE_NUMBER', 'text': '<PHONE_NUMBER>', 'operator': 'replace'},
    {'start': 67, 'end': 75, 'entity_type': 'PERSON', 'text': '<PERSON>', 'operator': 'replace'},
    {'start': 43, 'end': 54, 'entity_type': 'DATE_TIME', 'text': '<DATE_TIME>', 'operator': 'replace'}
]



In [22]:
from crewai.tools import BaseTool
from typing import Type
from pydantic import BaseModel, Field

In [23]:
class PIIRemovalInput(BaseModel):
    """Input schema for PII Removal."""
    text: str = Field(..., description="input text.")

In [24]:
class PIIRemovalTool(BaseTool):
    name: str = "PII Removal"
    description: str = (
        "Remove PII From Input Text to prevent PII Leakage."
    )
    args_schema: Type[BaseModel] = PIIRemovalInput

    def replace_pii(self, text: str):
        # Replace PII with a placeholder
        analyzer = AnalyzerEngine()
        anonymizer = AnonymizerEngine()
        analysis = analyzer.analyze(text, language='en')
        res = anonymizer.anonymize(text=text, analyzer_results=analysis)
        return res.text

    def _run(self, text: str) -> str:
        # Implementation goes here
        return self.replace_pii(text)

In [25]:
pii_tool = PIIRemovalTool()

In [26]:
pii_tool._run(text="can you tell me what orders i've placed in the last 3 months? my name is Hank Tate and my phone number is 555-123-4567")



"can you tell me what orders i've placed in <DATE_TIME>? my name is <PERSON> and my phone number is <PHONE_NUMBER>"

In [27]:
from better_profanity import profanity

In [29]:
censored_text = profanity.censor(
    "Go to hell",
)
censored_text

'Go to ****'

In [30]:
class BadWordsRemovalInput(BaseModel):
    """Input schema for PII Removal."""
    text: str = Field(..., description="input text.")

In [31]:
class BadWordsRemovalTool(BaseTool):
    name: str = "Bad Word Removal"
    description: str = (
        "Remove Bad Words From Input Text ."
    )
    args_schema: Type[BaseModel] = BadWordsRemovalInput

    def _run(self, text: str) -> str:
        # Implementation goes here
        return profanity.censor(text)

In [33]:
bad_words = BadWordsRemovalTool()
bad_words._run("we are in deep shit")

'we are in deep ****'

In [34]:
csv_parser = Agent(
    role="CSV Reader",
    goal="Extract Data From Retrieved CSV Results",
    backstory=(
        "From the Retrieved Results, Your role is to Understand the Asked Question"
        "answer based on the Results."
    ),
    llm=LLM(
        model=os.getenv('MODEL'),
        base_url=os.getenv('API_BASE'),
    ),
    verbose=True,
)

2024-11-23 23:28:39,083 - 2308 - llm.py-llm:178 - ERROR: Failed to get supported params: argument of type 'NoneType' is not iterable


In [35]:
csv_parser_tool = Task(
    tools = [csv_search],
    agent = csv_parser,
    description = (
        "Using the Search Results, Answer the given Question: {search_query}"
        "or give some code in case it needs some analysis to extract the result."
    ),
    expected_output = (
        "The Result Should describe how to get the result in case of code"
        "or should show the output directly if it could be extracted."
    )
)

In [36]:
pii_guardrail = Agent(
    role = "PII Guardrail",
    goal = (
        "Your role is to read text carefully"
        "and remove PII data from content"
        "then return the content without PII Data"
    ),
    backstory = (
        "You are a PII Guardrail, your job is to protect the PII data "
        "from being exposed." 
    ),
    llm=crewai_hf_llm,
    verbose=True,
)

In [37]:
bad_words_guardrail = Agent(
    role = "Bad Words Guardrail",
    goal = (
        "Your role is to read text carefully"
        "and remove Bad and Curse words from content"
        "then return the content without Bad Words"
    ),
    backstory = (
        "You are a guardrail that protects the text from bad words. "
    ),
    llm=crewai_hf_llm,
    verbose=True,
)

In [50]:
guardrail = Agent(
    role = "Guardrail",
    goal = (
        "Your role is to read text carefully"
        "and remove PII Data,Bad and Curse words from content"
        "then return the content without PII Data and Bad Words"
    ),
    backstory = (
        "You are a guardrail that prevents PII Leakage and showing bad words. "
    ),
    llm=crewai_hf_llm,
    verbose=True,
)

In [51]:
read_file_tool = FileReadTool()
read_dir_tool = DirectoryReadTool()

In [62]:
guard_task = Task(
    tools = [read_dir_tool, read_file_tool, pii_tool, bad_words],
    agent = guardrail,
    description = (
        "Remove PII Data and Bad Words from any .txt file under this directory: {path}"
    ),
    expected_output = (
        "The Result be text without any PII Data or Curse words."
    ),
    output_file=f"../{os.getenv('OUTPUT_FILES_PATH')}/clean_text.txt"
)

In [63]:
crew = Crew(
    agents=[guardrail],
    tasks=[guard_task],
    verbose=True,
)



In [64]:
result = crew.kickoff(
    inputs={
        "path": os.path.join(os.pardir, "assets", "data", "tmp")
    }
)

[1m[95m# Agent:[00m [1m[92mGuardrail[00m
[95m## Task:[00m [92mRemove PII Data and Bad Words from any .txt file under this directory: ..\assets\data\tmp[00m


[1m[95m# Agent:[00m [1m[92mGuardrail[00m
[95m## Using tool:[00m [92mList files in directory[00m
[95m## Tool Input:[00m [92m
"{\"directory\": \"..\\\\assets\\\\data\\\\tmp\"}"[00m
[95m## Tool Output:[00m [92m
File paths: 
-..\assets\data\tmp/file.txt
- ..\assets\data\tmp/foo.pdf
- ..\assets\data\tmp/Iris.csv[00m


[1m[95m# Agent:[00m [1m[92mGuardrail[00m
[95m## Using tool:[00m [92mRead a file's content[00m
[95m## Tool Input:[00m [92m
"{\"file_path\": \"..\\\\assets\\\\data\\\\tmp/file.txt\"}"[00m
[95m## Tool Output:[00m [92m
can you tell me what orders i've placed in the last 3 months? my name is Hank Tate and my phone number is 555-123-4567
motherfucker you look thirty!![00m






[1m[95m# Agent:[00m [1m[92mGuardrail[00m
[95m## Using tool:[00m [92mPII Removal[00m
[95m## Tool Input:[00m [92m
"{\"text\": \"can you tell me what orders i've placed in the last 3 months? my name is Hank Tate and my phone number is 555-123-4567 motherfucker you look thirty!!\"}"[00m
[95m## Tool Output:[00m [92m
can you tell me what orders i've placed in <DATE_TIME>? my name is <PERSON> and my phone number is <PHONE_NUMBER> motherfucker you look thirty!!


You ONLY have access to the following tools, and should NEVER make up tools that are not listed here:

Tool Name: List files in directory
Tool Arguments: {'directory': {'description': 'Mandatory directory to list content', 'type': 'str'}}
Tool Description: A tool that can be used to recursively list a directory's content.
Tool Name: Read a file's content
Tool Arguments: {'file_path': {'description': 'Mandatory file full path to read the file', 'type': 'str'}}
Tool Description: A tool that can be used to read a file'