In [2]:
from biascheck.analysis.basecheck import BaseCheck
from langchain.vectorstores import FAISS
from datasets import load_dataset
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# Load a publicly available dataset from Hugging Face
dataset = load_dataset("imdb", split="train[:20]")  # Load first 1000 samples for speed
documents = [{"text": doc["text"]} for doc in dataset]

# Initialize embeddings and FAISS vector database
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_texts([doc["text"] for doc in documents], embeddings)

# Initialize BaseCheck
checker = BaseCheck(
    data=vector_db,
    terms=["discrimination", "bias", "stereotypes"],
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    use_contextual_analysis=True,
)

# Perform analysis
results_df = checker.analyze()

# Generate and print report
report = checker.generate_report(results_df)
print(report)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


TypeError: object of type 'IndexFlatL2' has no len()

In [None]:
from biascheck.analysis.basecheck import BaseCheck
from py2neo import Graph

# Connect to a local or remote Neo4j instance
graph_db = Graph("bolt://localhost:7687", auth=("neo4j", "password"))

# Populate the graph database with sample data (optional)
# Example: Load Movie Dataset in Neo4j from https://github.com/neo4j-graph-examples/movies

# Initialize BaseCheck
checker = BaseCheck(
    data=graph_db,
    terms=["discrimination", "bias", "stereotypes"],
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    use_contextual_analysis=True,
)

# Perform analysis
results_df = checker.analyze()

# Generate and print report
report = checker.generate_report(results_df)
print(report)