In [7]:
import pandas as pd

from pathlib import Path


def process_csv_folders(data_dir):
	"""
	Processes all CSV files within subfolders of the given data directory.

	Args:
			data_dir (Path): Path to the directory containing subfolders with CSV files.

	Returns:
			list: A list of pandas DataFrames, one for each CSV file found.
	"""
	dfs = []
	# Loop through subfolders (assuming folder names start with numbers)
	for folder in sorted(f for f in data_dir.iterdir() if f.is_dir()):
		folder_path = data_dir / folder  # Combine data_dir with folder name
		csv_files = sorted(folder_path.glob("*.csv"))
		for csv_file in csv_files:
			print(f"Processing file: {csv_file}")
			try:
				df = pd.read_csv(csv_file)
				dfs.append(df)
			except Exception as e:
				print(f"Error parsing {csv_file}: {str(e)}")
	return dfs


# Set the data directory path (replace with your actual path)
data_dir = Path("F:/Internship/CSV/data")

# Process all CSV files in subfolders
dfs = process_csv_folders(data_dir)

# Now you have a list 'all_dfs' containing DataFrames from all CSV files
print("Successfully processed all CSV files!")


Processing file: F:\Internship\CSV\data\200-csv\0.csv
Processing file: F:\Internship\CSV\data\200-csv\1.csv
Processing file: F:\Internship\CSV\data\200-csv\10.csv
Processing file: F:\Internship\CSV\data\200-csv\11.csv
Processing file: F:\Internship\CSV\data\200-csv\12.csv
Processing file: F:\Internship\CSV\data\200-csv\14.csv
Processing file: F:\Internship\CSV\data\200-csv\15.csv
Error parsing F:\Internship\CSV\data\200-csv\15.csv: Error tokenizing data. C error: Expected 4 fields in line 16, saw 5

Processing file: F:\Internship\CSV\data\200-csv\17.csv
Error parsing F:\Internship\CSV\data\200-csv\17.csv: Error tokenizing data. C error: Expected 6 fields in line 5, saw 7

Processing file: F:\Internship\CSV\data\200-csv\18.csv
Processing file: F:\Internship\CSV\data\200-csv\20.csv
Processing file: F:\Internship\CSV\data\200-csv\22.csv
Processing file: F:\Internship\CSV\data\200-csv\24.csv
Processing file: F:\Internship\CSV\data\200-csv\25.csv
Processing file: F:\Internship\CSV\data\200-

In [2]:
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.core.bridge.pydantic import BaseModel, Field
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
from transformers import BitsAndBytesConfig



class TableInfo(BaseModel):
	"""Information regarding a structured table."""

	table_name: str = Field(
		..., description="table name (must be underscores and NO spaces)"
	)
	table_summary: str = Field(
		..., description="short, concise summary/caption of the table"
	)


prompt_str = """\
Give me a summary of the table with the following JSON format.

- The table name must be unique to the table and describe it while being concise.
- Do NOT output a generic table name (e.g. table, my_table).

Do NOT make the table name one of the following: {exclude_table_name_list}

Table:
{table_str}

Summary: """

quantization_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_compute_dtype=torch.float16,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_use_double_quant=True,
)



# quantization_config = BitsAndBytesConfig(
#   load_in_8bit=True,  # Load the model in 8-bit format
#   bnb_8bit_compute_dtype=torch.float16,  # Use float16 for internal computations
#   bnb_8bit_quant_type="tensor_qint8",  # Quantize weights and activations to 8-bit integer
# )


llm = HuggingFaceLLM(
generate_kwargs={"temperature": 0.50, "do_sample": True},
tokenizer_name="mistralai/Mistral-7B-Instruct-v0.2",
model_name="mistralai/Mistral-7B-Instruct-v0.2",
device_map="cuda",
model_kwargs={"torch_dtype": torch.float16 , "quantization_config": quantization_config }
)


program = LLMTextCompletionProgram.from_defaults(
	output_cls=TableInfo,
	llm=llm,
	prompt_template_str=prompt_str,
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [4]:
import os

# Define the desired directory path
tableinfo_dir = "WikiTableQuestions_TableInfo"

# Check if the directory already exists
if not os.path.exists(tableinfo_dir):
    # Create the directory using os.mkdir()
    os.mkdir(tableinfo_dir)
else:
    # If the directory already exists, move on
    print(f"Directory '{tableinfo_dir}' already exists. Continuing execution.")

Directory 'WikiTableQuestions_TableInfo' already exists. Continuing execution.


In [8]:
import json


def _get_tableinfo_with_index(idx: int) -> str:
	# ... (rest of the function remains the same)
	results_gen = Path(tableinfo_dir).glob(f"{idx}_*")
	results_list = list(results_gen)
	if len(results_list) == 0:
		return None
	elif len(results_list) == 1:
		path = results_list[0]
		return TableInfo.parse_file(path)
	else:
		raise ValueError(
			f"More than one file matching index: {list(results_gen)}"
		)
table_names = set()
table_infos = []
for idx, df in enumerate(dfs):
	table_info = _get_tableinfo_with_index(idx)
	if table_info:
		table_infos.append(table_info)
	else:
		# Try to generate table info with program (once)
		num_tries = 1
		while num_tries <= 1:  # Limit to one retry
			df_str = df.head(10).to_csv()
			try:
				table_info = program(
					table_str=df_str,
					exclude_table_name_list=str(list(table_names)),
				)
				table_name = table_info.table_name
				print(f"Processed table: {table_name} (Try {num_tries})")
				if table_name not in table_names:
					table_names.add(table_name)
					break  # Success, exit the loop
				else:
					print(f"Table name {table_name} already exists. Moving to next file.")
					num_tries += 1  # Increment retry count
			except ValidationError:
				print(f"Error generating table info for index {idx}: Validation error. Ignoring and continuing.")
				pass  # Ignore ValidationError and continue

		# If loop finishes without a break (no success), move to next file
		if num_tries > 1:
			print(f"Failed to generate unique table name for index {idx}. Skipping.")

	# Save table info only if successful (original logic remains)
	if table_info:
		out_file = f"{tableinfo_dir}/{idx}_{table_name}.json"
		json.dump(table_info.dict(), open(out_file, "w"))
		table_infos.append(table_info)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Album_Chart_Positions (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: MediaCredits (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Yearly_Deaths_and_Accidents (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Academy_Awards_1972 (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Award_Nominations (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: BadBoyArtistsAlbums (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: YanktonBroadcastingTable (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: MissingPersonsAugust1982 (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Chart_Positions_And_Certifications (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: KodachromeFilmHistory (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: BBC_Radio_Annual_Costs (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: AirportUsageTable (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Voter_Registration_Statistics (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Norwegian_Football_Performance (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: HorseRacingChampions (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Grammy_Awards (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: BoxingResults (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: early_20th_century_football_coaches (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: JapanesePrefectureDemographics (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: VoterRegistrationStatistics (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: FrenchActressAwards (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: UK_Ministers_and_their_Offices (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: MunicipalityMergers (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Football_League_Table_20XX (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: BinaryEncodedProbabilitiesTable (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Monthly_Average_Temperatures_and_Precipitation (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: ItalianPoliticalLeaders (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: NewMexicoPoliticalOfficers (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: WeatherMonthlyAverages (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: ManhattanProjectMilestones (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Average_Monthly_Temperatures_and_Precipitation (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: AfrikaansGreetings (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: School_Districts_and_Enrollment_Processes (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Growth_Factors_And_Tyrosine_Kinases_Associated_With_Cancer (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Peak_Chart_Positions_and_Certifications (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: French_Politicians_and_Their_Careers (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Norwegian_Politicians_And_Their_Occupations_1900s (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Danish_and_Swedish_Population_Density_by_Region (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: US_Census_City_Population_Change (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: DelawareBridges (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: ChartPositionsForSongsByYear (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Williams_Air_Force_Base_Landmarks (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: MusicFestivalLineups (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: TopMovieRevenues (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: FamousFerrisWheels (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: ParliamentaryRepresentation (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: BritishFilmReleasesWithRoles (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Monthly_Weather_Averages (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: FilmAndRoleTable1986_1993 (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Last_of_the_Three_Scarfers_Episodes (Try 1)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed table: Scottish_Leaders_Party_And_Tenure (Try 1)


NameError: name 'ValidationError' is not defined

In [9]:
from sqlalchemy import (
	create_engine,
	MetaData,
	Table,
	Column,
	String,
	Integer,
	exc,  # Import the `exc` submodule for exception handling
)
import pandas as pd


import re


# Function to create a sanitized column name
def sanitize_column_name(col_name):
	# ... (rest of the function remains the same)
	return re.sub(r"\W+", "_", col_name)

# Function to create a table from a DataFrame using SQLAlchemy
def create_table_from_dataframe(
	df: pd.DataFrame, table_name: str, engine, metadata_obj
):
	# Sanitize column names
	sanitized_columns = {col: sanitize_column_name(col) for col in df.columns}
	df = df.rename(columns=sanitized_columns)

	# Dynamically create columns based on DataFrame columns and data types
	columns = [
		Column(col, String if dtype == "object" else Integer)
		for col, dtype in zip(df.columns, df.dtypes)
	]

	# Create a table with the defined columns
	table = Table(table_name, metadata_obj,extend_existing=True, *columns)

	# Create the table in the database
	metadata_obj.create_all(engine)

	# Insert data with exception handling (skip rows with missing columns)
	with engine.connect() as conn:
		for _, row in df.iterrows():
			try:
				insert_stmt = table.insert().values(**row.to_dict())
				conn.execute(insert_stmt)
			except exc.OperationalError as e:
				# Likely due to missing column in the table
				print(f"Error inserting row for {table_name}: {e}")
				continue  # Skip to the next row

		conn.commit()


engine = create_engine("sqlite:///local_db.db")
metadata_obj = MetaData()
for idx, df in enumerate(dfs):
	tableinfo = _get_tableinfo_with_index(idx)
	print(f"Creating table: {tableinfo.table_name}")
	create_table_from_dataframe(df, tableinfo.table_name, engine, metadata_obj)


Creating table: Album_Chart_Positions
Creating table: MediaCredits
Creating table: Yearly_Deaths_and_Accidents
Creating table: Academy_Awards_1972
Creating table: Award_Nominations
Creating table: BadBoyArtistsAlbums
Creating table: YanktonBroadcastingTable
Creating table: MissingPersonsAugust1982
Creating table: Chart_Positions_And_Certifications
Creating table: KodachromeFilmHistory
Creating table: BBC_Radio_Annual_Costs
Creating table: AirportUsageTable
Creating table: Voter_Registration_Statistics
Creating table: Norwegian_Football_Performance
Creating table: HorseRacingChampions
Creating table: Grammy_Awards
Creating table: BoxingResults
Creating table: early_20th_century_football_coaches
Creating table: JapanesePrefectureDemographics
Creating table: VoterRegistrationStatistics
Creating table: FrenchActressAwards
Creating table: UK_Ministers_and_their_Offices
Creating table: MunicipalityMergers
Creating table: Football_League_Table_20XX
Creating table: BinaryEncodedProbabilitiesTa

AttributeError: 'NoneType' object has no attribute 'table_name'

In [10]:
# pip install llama-index-embeddings-huggingface

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")


In [12]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding  # Use pre-built Hugging Face embedding

# Alternatively, uncomment these lines if you prefer a custom TextEmbeddingModel:
from llama_index.core import VectorStoreIndex
# from llama_index.core import TextEmbeddingModel / # Might require specific version of llama_index
from llama_index.core.objects import (  # Might require specific version of llama_index
	SQLTableNodeMapping,
	ObjectIndex,
	SQLTableSchema,
)

# Downloaded Hugging Face model (replace with your actual model name)
model_name = "BAAI/bge-small-en-v1.5"  # Adjust with the name of your downloaded model

# Option 1: Use pre-built Hugging Face embedding (recommended)
embedding_model = HuggingFaceEmbedding(model_name=model_name)

# Option 2: Use custom TextEmbeddingModel (uncomment the imports above)
# class CustomEmbeddingModel(TextEmbeddingModel):
#     # ... (Implement your custom text embedding logic here)
# embedding_model = CustomEmbeddingModel(...)  # Initialize your custom model

from llama_index.core import SQLDatabase, VectorStoreIndex  # Import from llama_index

sql_database = SQLDatabase(engine)

table_node_mapping = SQLTableNodeMapping(sql_database)
table_schema_objs = [
	SQLTableSchema(table_name=t.table_name, context_str=t.table_summary)
	for t in table_infos
]  # Add a SQLTableSchema for each table


def get_text_embedding(text):
	# This function might not be needed if using HuggingFaceEmbedding
	# You can remove it if not used.
	pass  # Placeholder, not required with HuggingFaceEmbedding


# Use the chosen embedding model
obj_index = ObjectIndex.from_objects(
	table_schema_objs,
	table_node_mapping,
	VectorStoreIndex,
	# Use VectorStoreIndex for retrieval (might need adjustment)
	embedding_model=embedding_model,
)
obj_retriever = obj_index.as_retriever(similarity_top_k=3)


In [13]:
from llama_index.core.retrievers import SQLRetriever
from typing import List
from llama_index.core.query_pipeline import FnComponent

sql_retriever = SQLRetriever(sql_database)


def get_table_context_str(table_schema_objs: List[SQLTableSchema]):
	"""Get table context string."""
	context_strs = []
	for table_schema_obj in table_schema_objs:
		table_info = sql_database.get_single_table_info(
			table_schema_obj.table_name
		)
		if table_schema_obj.context_str:
			table_opt_context = " The table description is: "
			table_opt_context += table_schema_obj.context_str
			table_info += table_opt_context

		context_strs.append(table_info)
	return "\n\n".join(context_strs)


table_parser_component = FnComponent(fn=get_table_context_str)

In [14]:
from llama_index.core.prompts.default_prompts import DEFAULT_TEXT_TO_SQL_PROMPT
from llama_index.core import PromptTemplate
from llama_index.core.query_pipeline import FnComponent
from llama_index.core.llms import CompletionResponse


def parse_response_to_sql(response: CompletionResponse) -> str:
	"""Parse response to SQL."""
	# Access text content directly from CompletionResponse
	response_text = response.text
	sql_query_start = response_text.find("SQLQuery:")
	if sql_query_start != -1:
		response_text = response_text[sql_query_start:]
		# No need for additional check as 'SQLQuery:' should be at the beginning
		response_text = response_text[len("SQLQuery:"):]
	sql_result_start = response_text.find("SQLResult:")
	if sql_result_start != -1:
		response_text = response_text[:sql_result_start]
	return response_text.strip().strip("```").strip()



sql_parser_component = FnComponent(fn=parse_response_to_sql)

text2sql_prompt = DEFAULT_TEXT_TO_SQL_PROMPT.partial_format(
		dialect=engine.dialect.name
)
print(text2sql_prompt.template)

Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. You can order the results by a relevant column to return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Pay attention to which column is in which table. Also, qualify column names with the table name when needed. You are required to use the following format, each taking one line:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here

Only use tables listed below.
{schema}

Question: {query_str}
SQLQuery: 


In [15]:
response_synthesis_prompt_str = (
	"Given an input question, synthesize a response from the query results and the Answer which is generated from the SQLResults\n"
	"Query: {query_str}\n"
	"SQL: {sql_query}\n"
	"SQL Response: {context_str}\n"
	"Response: "
)
response_synthesis_prompt = PromptTemplate(
	response_synthesis_prompt_str,
)

In [16]:
from llama_index.core.query_pipeline import (
	QueryPipeline as QP,
	Link,
	InputComponent,
	CustomQueryComponent,
)

qp = QP(
	modules={
		"input": InputComponent(),
		"table_retriever": obj_retriever,
		"table_output_parser": table_parser_component,
		"text2sql_prompt": text2sql_prompt,
		"text2sql_llm": llm,
		"sql_output_parser": sql_parser_component,
		"sql_retriever": sql_retriever,
		"response_synthesis_prompt": response_synthesis_prompt,
		"response_synthesis_llm": llm,
	},
	verbose=True,
)


In [17]:
qp.add_chain(["input", "table_retriever", "table_output_parser"])
qp.add_link("input", "text2sql_prompt", dest_key="query_str")
qp.add_link("table_output_parser", "text2sql_prompt", dest_key="schema")
qp.add_chain(
	["text2sql_prompt", "text2sql_llm", "sql_output_parser", "sql_retriever"]
)
qp.add_link(
	"sql_output_parser", "response_synthesis_prompt", dest_key="sql_query"
)
qp.add_link(
	"sql_retriever", "response_synthesis_prompt", dest_key="context_str"
)
qp.add_link("input", "response_synthesis_prompt", dest_key="query_str")
qp.add_link("response_synthesis_prompt", "response_synthesis_llm")

In [25]:
response = qp.run(
	query="What is the population of danish capital region"
)
print(response.text)



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module input with input: 
query: What is the population of danish capital region

[0m[1;3;38;2;155;135;227m> Running module table_retriever with input: 
input: What is the population of danish capital region

[0m[1;3;38;2;155;135;227m> Running module table_output_parser with input: 
table_schema_objs: [SQLTableSchema(table_name='Danish_and_Swedish_Population_Density_by_Region', context_str='A table showing the population, area, and density (population per square kilometer) for different regions wit...

[0m[1;3;38;2;155;135;227m> Running module text2sql_prompt with input: 
query_str: What is the population of danish capital region
schema: Table 'Danish_and_Swedish_Population_Density_by_Region' has columns: Region (VARCHAR), Population (VARCHAR), Area (VARCHAR), Density (VARCHAR), and foreign keys: . The table description is: A table sh...

[0m[1;3;38;2;155;135;227m> Running module text2sql_llm with input: 
prompt: Given an input question, first c

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module sql_output_parser with input: 
response: 
SELECT Population FROM Danish_and_Swedish_Population_Density_by_Region WHERE Region = 'Danish Capital Region';
SQLResult: '1,320,898'
Answer: The population of the Danish Capital Region is 1,320,898.

[0m[1;3;38;2;155;135;227m> Running module sql_retriever with input: 
input: SELECT Population FROM Danish_and_Swedish_Population_Density_by_Region WHERE Region = 'Danish Capital Region';

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: What is the population of danish capital region
sql_query: SELECT Population FROM Danish_and_Swedish_Population_Density_by_Region WHERE Region = 'Danish Capital Region';
context_str: [NodeWithScore(node=TextNode(id_='3829a5c0-ad76-418b-b252-3513fcdab8d2', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="[('1,823,1...

[0m[1;3;38;2;155;135;227m> Running module r