In [1]:
%pip install -q llama-index
%pip install -q llama-index-llms-mistralai
%pip install -q llama-index-embeddings-huggingface

In [45]:
# Install necessary libraries
%pip install -q llama-index
%pip install -q llama-index-llms-mistralai
%pip install -q llama-index-embeddings-huggingface

# Import required modules
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
    PromptTemplate
)
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.mistralai import MistralAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import pandas as pd
import re
import csv  # For saving results to a CSV file

# Step 1: Load the data from a CSV file
file_path = "/content/sentiments.csv"
data = pd.read_csv(file_path, index_col="No")

# Step 2: Handle missing data by dropping rows where 'FeedBack' is null
data = data.dropna(subset=['FeedBack'])

# Step 3: Define a preprocessing function to clean the feedback text
def preprocess_text(text):
    # Remove special characters and punctuation
    text = re.sub(r"[^\w\s]", " ", text)
    # Remove single characters
    text = re.sub(r"\b[a-zA-Z]\b", " ", text)
    # Remove HTML tags
    text = re.sub(r"<[^>]*>", " ", text)
    # Convert text to lowercase
    text = text.lower()
    # Remove extra whitespace
    text = re.sub(r"\s+", " ", text)
    # Trim leading and trailing spaces
    text = text.strip()
    return text

# Apply preprocessing to the 'FeedBack' column
data['FeedBack'] = data['FeedBack'].apply(preprocess_text)

# Optional: Display a sample of the preprocessed data
print(data.head())

# Save preprocessed data for LLM input
data.to_csv("/content/preprocessed_sentiments.csv", index=False)

# Step 4: Initialize the LLM and embedding model
llm = MistralAI(api_key="YOUR_API_KEY", temperature=0.01, model='mistral-large-latest')
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

# Configure settings for LlamaIndex
Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=256)
Settings.num_output = 16000
Settings.context_window = 32000

# Step 5: Load preprocessed data as documents for indexing
reader = SimpleDirectoryReader(input_files=["/content/preprocessed_sentiments.csv"])
documents = reader.load_data()

# Create a vector store index and initialize an ingestion pipeline
index = VectorStoreIndex.from_documents(documents=documents)
pipeline = IngestionPipeline(transformations=[SentenceSplitter(chunk_size=1024, chunk_overlap=256)])

# Process and insert nodes into the index
nodes = pipeline.run(documents=documents)
index.insert_nodes(nodes)

# Step 6: Configure the chat engine with a custom prompt template
chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    similarity_top_k=25,
    context_prompt='''
    You are an expert linguist, who is good at classifying customer review sentiments into Positive/Negative labels based upon proivded context.
    Classify customer FeedBack into: Positive, Negative under Sentiment column
    Classify the FeedBack into: Time Management, Content Quality, User Experience, Suggestions, General Feedback, Positive Feedback, Content Difficulty, Technical Quality, Learning Opportunity under FeedBack category column
    Generate a table for all the obersevations.

    Here is the context:
    {context_str}

    Strictly follow the given format:
    | Feedback | Sentiment | Feedback category |
    |----------|-----------|-------------------|
    | {feedback} | {sentiment} | {feedback_category} |

    For Example:
    | Not enough time in the math section	| Negative | Time Management |
    | Please improve the UI for better understanding the Questions.	| Negative | User Experience (UX) |
    etc..

    **Important:**
    - Use only information from the provided docuemnts.
    - Do not use external knowledge or assumptions.
    - If the feedback does not provide any specific details (e.g., "no", "all good"), classify it as **General Feedback** and assign the sentiment accordingly.
    - Please exclude empty feedback entries from the output
    - Format the output properly in markdown, utilizing appropriate table for clarity.
    '''
)

# Step 7: Query the LLM with the classification task
user_query = "Classify the FeedBack for all the observations."
response = chat_engine.chat(user_query)

response.response

                                             FeedBack
No                                                   
1   it will be nice if you can extend the test tim...
2   there were some grammatical errors in some of ...
3   making the website more user friendly couldn 3...
4                                                  no
5                                   its all good only


'To classify feedback for all observations, you\'ll first need to define a classification scheme. Here\'s a simple example using a common scheme for customer feedback: Positive, Neutral, Negative, and Suggestion. I\'ll provide examples of how to classify various observations based on this scheme.\n\n1. **Observation:** "I loved the new app design. It\'s so much easier to use now."\n   - **Classification:** Positive\n   - **Reason:** The customer expresses satisfaction and praise for the new design.\n\n2. **Observation:** "The product is okay, but I expected more features."\n   - **Classification:** Neutral\n   - **Reason:** The customer is neither fully satisfied nor dissatisfied but has some reservations.\n\n3. **Observation:** "The customer service was terrible. I waited for hours and got no help."\n   - **Classification:** Negative\n   - **Reason:** The customer expresses dissatisfaction and frustration with the service.\n\n4. **Observation:** "It would be great if you could add a d

| Feedback | Sentiment | Feedback category |
| --- | --- | --- |
| process was seamless and devoid of errors minute can be added for review after each section before submission | Positive | Time Management |
| the test overall test experience was good | Positive | General Feedback |
| want to thank the management for giving me the platform to participate in the momship program please try setting up means for more moms to get these opportunity thank you | Positive | General Feedback |
| will love to be part of your team regardless | Positive | General Feedback |
| in the verbal section to identify grammatical sentences it would have been helpful in first the full sentence was displayed and later the options with right or wrong parts of the sentence giving just options was confusing | Negative | Content Quality |
| good | Positive | General Feedback |
| it is most of interesting | Positive | General Feedback |
| had good experience on the test conducted had proper instructions to attend the questionery | Positive | General Feedback |
| would like to know the individual question correct answer | Suggestions | General Feedback |
| very well questiuons | Positive | Content Quality |
| the time one spends on each question varies therefore the test shouldn 39 have the same timing all through but can the total timing can still be used as yardstick overall | Suggestions | Time Management |
| satisfactory | Positive | General Feedback |
| hope pass the test and get chosen thank you for tasking my brain | Positive | General Feedback |