# LnagCahain trial

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
import pandas as pd



In [None]:


# Load and preprocess the dataset
def preprocess_dataset(file_path, relevant_columns):
    """
    Preprocess the Quranic dataset by removing unnecessary columns.
    Args:
    - file_path: Path to the CSV file.
    - relevant_columns: List of columns to retain.

    Returns:
    - Processed DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        return df[relevant_columns]
    except Exception as e:
        print(f"Error in loading or processing dataset: {e}")
        return pd.DataFrame()  # Return an empty DataFrame if error occurs


# Specify relevant columns
relevant_columns = ['surah_name_roman', 'surah_name_en', 'ayah_no_surah', 'ayah_en']
quran_df = preprocess_dataset("TheQuranDataset.csv", relevant_columns)

if quran_df.empty:
    print("The dataset could not be loaded. Please check the file path and content.")
    exit()  # Exit if the dataset is not properly loaded





# Chunk the dataset for LLM processing
def preprocess_and_chunk(df, chunk_size=4000):
    """
    Preprocesses the dataset to create formatted context chunks.
    Args:
    - df: Processed DataFrame.
    - chunk_size: Maximum character size for each chunk.

    Returns:
    - List of context chunks (list of str).
    """
    context_lines = [
        f"Ayah {row['ayah_no_surah']} of Surah {row['surah_name_roman']} ({row['surah_name_en']}): "
        f"'{row['ayah_en']}'"
        for _, row in df.iterrows()
    ]
    combined_context = "\n".join(context_lines)

    # Chunk the combined context
    context_chunks = [
        combined_context[i:i + chunk_size]
        for i in range(0, len(combined_context), chunk_size)
    ]
    return context_chunks


context_chunks = preprocess_and_chunk(quran_df)

if not context_chunks:
    print("No context chunks created. Please check the dataset or preprocessing logic.")
    exit()  # Exit if no chunks are created


# Query definition
query = "Does the Quran mention dogs? Provide references and context."

# Define a prompt template
prompt_template = """
Use the following Quranic data to answer the query and provide references with context.

Context:
{context}

Query: {question}
"""

# Create a LangChain PromptTemplate
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

# Initialize the Ollama LLM
llm = Ollama(model="llama3")

# Create an LLMChain for structured execution
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Execute the query on each chunk
def execute_query_on_chunks(context_chunks, query):
    """
    Executes a query on context chunks using the LLM chain.
    Args:
    - context_chunks: List of text chunks.
    - query: Query string.

    Returns:
    - Combined results from all chunks.
    """
    results = []
    for chunk in context_chunks:
        inputs = {"context": chunk, "question": query}
        try:
            result = llm_chain.run(inputs)
            results.append(result)
        except Exception as e:
            print(f"Error processing chunk: {e}")
    return "\n".join(results)


final_result = execute_query_on_chunks(context_chunks, query)

# Print the final output
print("\nOutput:")
print(final_result)


# Keyword search function
def keyword_search(keywords, df):
    """
    Searches for ayahs containing specific keywords in the Quranic dataset.
    Args:
    - keywords: List of keywords to search for.
    - df: DataFrame containing Quranic data.

    Returns:
    - DataFrame with matched ayahs and relevant details.
    """
    matches = df[
        df['ayah_en'].str.contains('|'.join(keywords), case=False, na=False)
    ]
    return matches


# Keywords related to "dogs"
keywords = ["dog", "dogs", "dogs talk", "speak"]
matched_ayahs = keyword_search(keywords, quran_df)

# Display the results in a clean table format
if not matched_ayahs.empty:
    print("\nMatched Ayahs:")
    print(
        matched_ayahs.to_markdown(index=False)  # Use Markdown for table-like display
    )
else:
    print("\nNo matches found for the given keywords.")
