In [None]:
from biascheck.analysis.basecheck import BaseCheck
from langchain.vectorstores import FAISS
from datasets import load_dataset
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# Load a publicly available dataset from Hugging Face
dataset = load_dataset("imdb", split="train[:20]")  # Load first 20 samples for speed
documents = [{"text": doc["text"]} for doc in dataset]

# Initialize embeddings and FAISS vector database
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_texts([doc["text"] for doc in documents], embeddings)

# Initialize BaseCheck
checker = BaseCheck(
    data=vector_db,
    terms=["discrimination", "bias", "stereotypes"],
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    use_contextual_analysis=True,
)

# Perform analysis
results_df = checker.analyze(top_k=10)

# Generate and print report
report = checker.generate_report(results_df)
print(report)


In [None]:
from datasets import load_dataset
from biascheck.analysis.basecheck import BaseCheck

# Load the "ag_news" dataset from Hugging Face
dataset = load_dataset("ag_news", split="train[:100]") 

# Prepare a graph-like structure this is just an example show on how to use the classes 

# We'll create a dictionary simulating nodes with "categories" and "text"
graph_data = []
for idx, record in enumerate(dataset):
    graph_data.append({
        "id": idx,
        "category": record["label"],
        "text": record["text"]
    })

# Simulate a graph database by iterating over graph_data
class SimulatedGraph:
    def run(self, query):
        if query == "MATCH (n) RETURN n":
            return [{"n": node} for node in graph_data]

# Initialize the simulated graph database
graph_db = SimulatedGraph()

# Initialize BaseCheck for the graph database
checker = BaseCheck(
    data=graph_db,
    terms=["discrimination", "bias", "stereotypes"],
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    use_contextual_analysis=True,
    verbose=True
)

# Perform analysis
results_df = checker.analyze()

# Generate and print report
report = checker.generate_report(results_df)
print(report)

# Save results to CSV for further inspection
results_df.to_csv("graph_database_bias_analysis.csv", index=False)
