In [None]:
# Step 1.1: Mount Google Drive (if your dataset is stored there)


# Step 1.2: Install required libraries
!pip install pandas openpyxl

# Step 1.3: Import necessary libraries
import pandas as pd

# Step 1.4: Load the dataset from Google Drive or local path
# Replace the file path with the actual location of your dataset
# = "/content/drive/MyDrive/DatasetGrievence.xlsx"
file_path = "/content/DatasetGrievence.xlsx"
 # Update this path if needed
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 1.5: Display the first few rows of the dataset
print("First 5 rows of the dataset:")
print(df.head())

# Step 1.6: Inspect the dataset structure
print("\nDataset Information:")
print(df.info())

# Step 1.7: Clean and preprocess the data
# Drop rows with missing values
df = df.dropna()

# Normalize text fields (convert to lowercase and remove special characters)
text_columns = ["Message Description", "Citizen Feedback"]
for col in text_columns:
    df[col] = df[col].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)

# Step 1.8: Display the cleaned dataset
print("\nCleaned Dataset:")
print(df.head())

First 5 rows of the dataset:
   Grievance ID  Citizen Name  Officer Name Communication Date     Role  \
0           101    Ravi Kumar  Anjali Verma         2024-01-10  Citizen   
1           101  Anjali Verma  Anjali Verma         2024-01-12  Officer   
2           101    Ravi Kumar  Anjali Verma         2024-01-15  Citizen   
3           101  Anjali Verma  Anjali Verma         2024-01-20  Officer   
4           102    Priya Shah  Rajeev Singh         2024-02-05  Citizen   

                                 Message Description Resolution Status  \
0  "There is poor irrigation in our village, lead...           Pending   
1  "We have installed a new irrigation system and...          Resolved   
2  "The irrigation system still does not cover al...           Pending   
3  "We have made an additional adjustment to exte...          Resolved   
4  "I suggest providing online classes for high s...           Pending   

  Reappeal Eligibility                             Reappeal Reason  \
0    

In [None]:
!pip install sentence-transformers faiss-cpu

# Step 2.2: Import necessary libraries
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Step 2.3: Load a pre-trained Sentence-BERT model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Step 2.4: Generate embeddings for the "Message Description" field
descriptions = df["Message Description"].tolist()
embeddings = model.encode(descriptions)

# Step 2.5: Display the shape of the embeddings
print("Embeddings shape:", embeddings.shape)

# Step 2.6: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatL2(dimension)  # Create a FAISS index
index.add(embeddings)  # Add embeddings to the index

print("FAISS index created with", index.ntotal, "entries.")

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embeddings shape: (30, 384)
FAISS index created with 30 entries.


In [None]:
# Step 3.1: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=5):
    """
    Retrieve the top-k most similar grievances from the knowledge base.

    Args:
        query (str): The input query (new grievance).
        top_k (int): Number of similar grievances to retrieve.

    Returns:
        pd.DataFrame: A DataFrame containing the top-k similar grievances.
    """
    # Generate embedding for the query
    query_embedding = model.encode([query])

    # Perform similarity search in the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve the corresponding rows from the dataset
    similar_grievances = df.iloc[indices[0]]

    return similar_grievances

# Step 3.2: Test the retrieval function with a sample query
sample_query = "The irrigation system is not working properly."
similar_grievances = retrieve_similar_grievances(sample_query, top_k=3)

# Step 3.3: Display the retrieved grievances
print("Top similar grievances for the query:")
print(similar_grievances[["Grievance ID", "Message Description", "Resolution Status", "Citizen Feedback"]])

Top similar grievances for the query:
   Grievance ID                                Message Description  \
0           101  there is poor irrigation in our village leadin...   
1           101  we have installed a new irrigation system and ...   
2           101  the irrigation system still does not cover all...   

  Resolution Status                               Citizen Feedback  
0           Pending  the system improved but needs better coverage  
1          Resolved  the system improved but needs better coverage  
2           Pending            needs a more comprehensive solution  


In [None]:
# Step 4.1: Install required libraries
!pip install transformers

# Step 4.2: Import necessary libraries
from transformers import pipeline

# Step 4.3: Load a pre-trained generative model
generator = pipeline("text-generation", model="gpt-3.5-turbo")

# Step 4.4: Define a function to generate a response using RAG
def generate_response(query, context):
    """
    Generate a response using a generative model based on the query and retrieved context.

    Args:
        query (str): The input query (new grievance).
        context (str): Retrieved context from similar grievances.

    Returns:
        str: Generated response.
    """
    # Combine the query and context into a prompt
    prompt = f"Query: {query}\nContext: {context}\nResponse:"

    # Generate a response using the generative model
    response = generator(prompt, max_length=100)

    return response[0]["generated_text"]

# Step 4.5: Test the generation function with a sample query
sample_query = "The irrigation system is not working properly."
similar_grievances = retrieve_similar_grievances(sample_query, top_k=3)

# Extract context from the retrieved grievances
context = "\n".join(similar_grievances["Message Description"])

# Generate a response
response = generate_response(sample_query, context)

# Step 4.6: Display the generated response
print("Generated Response:")
print(response)



OSError: gpt-3.5-turbo is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
# Step 4.1: Install required libraries
!pip install transformers torch

# Step 4.2: Import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Step 4.3: Load a pre-trained Falcon model
model_name = "tiiuae/falcon-7b-instruct"  # Falcon 7B Instruct model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True)

# Step 4.4: Define a function to generate a response using Falcon
def generate_response_falcon(query, context):
    """
    Generate a response using Falcon based on the query and retrieved context.

    Args:
        query (str): The input query (new grievance).
        context (str): Retrieved context from similar grievances.

    Returns:
        str: Generated response.
    """
    # Combine the query and context into a prompt
    prompt = f"Query: {query}\nContext: {context}\nResponse:"

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate a response
    outputs = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, temperature=0.7)

    # Decode and return the generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 4.5: Test the Falcon generation function
sample_query = "The irrigation system is not working properly."
similar_grievances = retrieve_similar_grievances(sample_query, top_k=3)

# Extract context from the retrieved grievances
context = "\n".join(similar_grievances["Message Description"])

# Generate a response using Falcon
response = generate_response_falcon(sample_query, context)

# Step 4.6: Display the generated response
print("Generated Response:")
print(response)



tokenizer_config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

AttributeError: 'FalconForCausalLM' object has no attribute 'encode'

In [None]:
# Step 1: Install required libraries
!pip install sentence-transformers faiss-cpu transformers torch

# Step 2: Import necessary libraries
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import pandas as pd
# Step 3: Load a pre-trained embedding model for retrieval
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Step 4: Generate embeddings for the "Message Description" field
file_path = "/content/DatasetGrievence.xlsx"
 # Update this path if needed
df = pd.read_excel(file_path, sheet_name="Sheet1")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 5: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatL2(dimension)  # Create a FAISS index
index.add(embeddings)  # Add embeddings to the index

print("FAISS index created with", index.ntotal, "entries.")

# Step 6: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    """
    Retrieve the top-k most similar grievances from the knowledge base.

    Args:
        query (str): The input query (new grievance).
        top_k (int): Number of similar grievances to retrieve.

    Returns:
        pd.DataFrame: A DataFrame containing the top-k similar grievances.
    """
    # Generate embedding for the query
    query_embedding = embedding_model.encode([query])

    # Perform similarity search in the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve the corresponding rows from the dataset
    similar_grievances = df.iloc[indices[0]]

    return similar_grievances

# Step 7: Load a pre-trained Falcon model for text generation
model_name = "tiiuae/falcon-7b-instruct"  # Falcon 7B Instruct model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True)

# Step 8: Define a function to generate a response using Falcon
def generate_response_falcon(query, context):
    """
    Generate a response using Falcon based on the query and retrieved context.

    Args:
        query (str): The input query (new grievance).
        context (str): Retrieved context from similar grievances.

    Returns:
        str: Generated response.
    """
    # Combine the query and context into a prompt
    prompt = f"Query: {query}\nContext: {context}\nResponse:"

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate a response
    outputs = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, temperature=0.7)

    # Decode and return the generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 9: Test the RAG system with a sample query
sample_query = "The irrigation system is not working properly."
similar_grievances = retrieve_similar_grievances(sample_query, top_k=3)

# Extract context from the retrieved grievances
context = "\n".join(similar_grievances["Message Description"])

# Generate a response using Falcon
response = generate_response_falcon(sample_query, context)

# Step 10: Display the generated response
print("Generated Response:")
print(response)

FAISS index created with 48 entries.


tokenizer_config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Generated Response:
Query: The irrigation system is not working properly.
Context: "The irrigation system still does not cover all areas. I request further improvement and coverage expansion."
"We have installed a new irrigation system and scheduled maintenance. Please allow some time for its effect."
"There is poor irrigation in our village, leading to crop failure. This needs urgent attention."
Response: "I understand your concern and appreciate your patience. The irrigation system has been improved and will cover the entire area


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# Step 1: Prepare data
X = df["Message Description"]
y = df["Category"]  # Assuming you have a "Category" column in your dataset

# Step 2: Vectorize text
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Step 3: Train a classification model
classifier = LogisticRegression()
classifier.fit(X_vectorized, y)

# Step 4: Define a function to classify grievances
def classify_grievance(message):
    message_vectorized = vectorizer.transform([message])
    return classifier.predict(message_vectorized)[0]

# Step 5: Test the classification function
sample_query = "The irrigation system is not working properly."
category = classify_grievance(sample_query)
print("Grievance Category:", category)


KeyError: 'Category'

In [None]:
# Step 1: Define a function to categorize grievances
def categorize_grievance(message):
    # Convert the message to lowercase for case-insensitive matching
    message = message.lower()

    # Define category-specific keywords
    agriculture_keywords = ["irrigation", "fertilizer", "crop", "farm", "agriculture", "flood relief"]
    education_keywords = ["school", "online class", "exam", "education", "curriculum", "learning"]
    police_keywords = ["police", "theft", "response time", "complaint", "investigation","missing","lost","stolen"]
    infrastructure_keywords = ["power outage", "streetlight", "road", "pothole", "repair", "infrastructure"]
    flood_relief_keywords = ["flood relief", "subsidy", "package", "affected families"]

    # Check for matches and assign a category
    if any(keyword in message for keyword in agriculture_keywords):
        return "Agriculture"
    elif any(keyword in message for keyword in education_keywords):
        return "Education"
    elif any(keyword in message for keyword in police_keywords):
        return "Police Response"
    elif any(keyword in message for keyword in infrastructure_keywords):
        return "Infrastructure"
    elif any(keyword in message for keyword in flood_relief_keywords):
        return "Flood Relief"
    else:
        return "Other"

# Step 2: Apply the categorization function to the dataset
df["Category"] = df["Message Description"].apply(categorize_grievance)

# Step 3: Display the categorized dataset
print("Categorized Dataset:")
print(df[["Grievance ID", "Message Description", "Category"]])

Categorized Dataset:
    Grievance ID                                Message Description  \
0            101  "There is poor irrigation in our village, lead...   
1            101  "We have installed a new irrigation system and...   
2            101  "The irrigation system still does not cover al...   
3            101  "We have made an additional adjustment to exte...   
4            102  "I suggest providing online classes for high s...   
5            102  "We have approved the online education program...   
6            103  "There was a theft in my neighborhood, and the...   
7            103  "We are investigating the matter and have incr...   
8            103  "The patrols are helpful, but I am still unhap...   
9            103  "The issue has been escalated to the investiga...   
10           104  "I have not received the fertilizers promised ...   
11           104  "Subsidized fertilizers are being provided now...   
12           104  "The fertilizers arrived late and stil

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# Step 2: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"  # Update this path if needed
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 3: Categorize grievances
def categorize_grievance(message):
    message = message.lower()
    agriculture_keywords = ["irrigation", "fertilizer", "crop", "farm", "agriculture", "flood relief"]
    education_keywords = ["school", "online class", "exam", "education", "curriculum", "learning"]
    police_keywords = ["police", "theft", "response time", "complaint", "investigation"]
    infrastructure_keywords = ["power outage", "streetlight", "road", "pothole", "repair", "infrastructure"]
    flood_relief_keywords = ["flood relief", "subsidy", "package", "affected families"]

    if any(keyword in message for keyword in agriculture_keywords):
        return "Agriculture"
    elif any(keyword in message for keyword in education_keywords):
        return "Education"
    elif any(keyword in message for keyword in police_keywords):
        return "Police Response"
    elif any(keyword in message for keyword in infrastructure_keywords):
        return "Infrastructure"
    elif any(keyword in message for keyword in flood_relief_keywords):
        return "Flood Relief"
    else:
        return "Other"

df["Category"] = df["Message Description"].apply(categorize_grievance)

# Step 4: Train a classification model
X = df["Message Description"]
y = df["Category"]

vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

classifier = LogisticRegression()
classifier.fit(X_vectorized, y)

def classify_grievance(message):
    message_vectorized = vectorizer.transform([message])
    return classifier.predict(message_vectorized)[0]

# Step 5: Generate embeddings and store in FAISS
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]]

# Step 6: Load a generative model (Falcon)
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True)

def generate_response_falcon(query, context):
    prompt = f"Query: {query}\nContext: {context}\nResponse:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, temperature=0.7)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 7: Perform sentiment analysis
sentiment_analyzer = pipeline("sentiment-analysis")

def analyze_sentiment(feedback):
    result = sentiment_analyzer(feedback)[0]
    return result["label"], result["score"]

# Step 8: Assign priority
def assign_priority(sentiment, resolution_status):
    if sentiment == "NEGATIVE" and resolution_status == "Pending":
        return "High"
    elif sentiment == "NEGATIVE":
        return "Medium"
    else:
        return "Low"

# Step 9: Recommend solutions
def recommend_solution(query):
    similar_grievances = retrieve_similar_grievances(query)
    resolved_solutions = similar_grievances[similar_grievances["Resolution Status"] == "Resolved"]["Message Description"]
    return resolved_solutions.tolist()

# Step 10: End-to-end pipeline
def process_grievance(query):
    # Classify the grievance
    category = classify_grievance(query)

    # Retrieve similar grievances
    similar_grievances = retrieve_similar_grievances(query)

    # Generate a response using RAG
    context = "\n".join(similar_grievances["Message Description"])
    response = generate_response_falcon(query, context)

    # Analyze sentiment
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]
    sentiment, score = analyze_sentiment(feedback)

    # Assign priority
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)

    # Recommend solutions
    recommendations = recommend_solution(query)

    return {
        "Category": category,
        "Response": response,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendations": recommendations
    }

# Step 11: Test the system
sample_query = "The irrigation system is not working properly."
result = process_grievance(sample_query)

print("Category:", result["Category"])
print("Response:", result["Response"])
print("Sentiment:", result["Sentiment"])
print("Priority:", result["Priority"])
print("Recommendations:")
for solution in result["Recommendations"]:
    print("-", solution)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Category: Agriculture
Response: Query: The irrigation system is not working properly.
Context: "The irrigation system still does not cover all areas. I request further improvement and coverage expansion."
"We have installed a new irrigation system and scheduled maintenance. Please allow some time for its effect."
"There is poor irrigation in our village, leading to crop failure. This needs urgent attention."
Response: "We will examine the situation and address it accordingly. Thank you for your patience."
Sentiment: NEGATIVE
Priority: High
Recommendations:
- "We have installed a new irrigation system and scheduled maintenance. Please allow some time for its effect."


In [None]:
test_queries = [
    {
        "query": "The irrigation system is not working properly.",
        "expected_category": "Agriculture",
        "expected_response": "We understand the issue with the irrigation system and are actively working on extending its coverage.",
        "expected_priority": "High",
        "expected_recommendations": ["Install new irrigation systems", "Schedule maintenance."]
    },
    {
        "query": "There are no streetlights in our area.",
        "expected_category": "Infrastructure",
        "expected_response": "Streetlights have been installed in the affected areas.",
        "expected_priority": "Medium",
        "expected_recommendations": ["Repair streetlights", "Increase patrols for safety."]
    }
]
for test in test_queries:
    result = process_grievance(test["query"])
    print("Query:", test["query"])
    print("Category:", result["Category"], "| Expected:", test["expected_category"])
    print("Response:", result["Response"], "| Expected:", test["expected_response"])
    print("Priority:", result["Priority"], "| Expected:", test["expected_priority"])
    print("Recommendations:", result["Recommendations"], "| Expected:", test["expected_recommendations"])

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Query: The irrigation system is not working properly.
Category: Agriculture | Expected: Agriculture
Response: Query: The irrigation system is not working properly.
Context: "The irrigation system still does not cover all areas. I request further improvement and coverage expansion."
"We have installed a new irrigation system and scheduled maintenance. Please allow some time for its effect."
"There is poor irrigation in our village, leading to crop failure. This needs urgent attention."
Response: "Thank you for letting us know. We will inspect the irrigation system and work on it to ensure proper coverage | Expected: We understand the issue with the irrigation system and are actively working on extending its coverage.
Priority: High | Expected: High
Recommendations: ['"We have installed a new irrigation system and scheduled maintenance. Please allow some time for its effect."'] | Expected: ['Install new irrigation systems', 'Schedule maintenance.']
Query: There are no streetlights in our

In [None]:
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# Step 1: Load Pre-trained Models
# Load a text classification model
classifier = pipeline("text-classification", model="nlptown/bert-base-multilingual-uncased-sentiment")

# Load a generative model (Falcon)
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)

# Step 2: Define Categories
categories = ["Agriculture", "Education", "Police Response", "Infrastructure", "Flood Relief"]

# Step 3: Train a Simple Classifier (Optional)
# If you have labeled data, you can train a classifier here.
# For simplicity, we'll use keyword-based categorization.
def categorize_grievance(message):
    message = message.lower()
    agriculture_keywords = ["irrigation", "fertilizer", "crop", "farm", "agriculture", "flood relief"]
    education_keywords = ["school", "online class", "exam", "education", "curriculum", "learning"]
    police_keywords = ["police", "theft", "response time", "complaint", "investigation"]
    infrastructure_keywords = ["power outage", "streetlight", "road", "pothole", "repair", "infrastructure"]
    flood_relief_keywords = ["flood relief", "subsidy", "package", "affected families"]

    if any(keyword in message for keyword in agriculture_keywords):
        return "Agriculture"
    elif any(keyword in message for keyword in education_keywords):
        return "Education"
    elif any(keyword in message for keyword in police_keywords):
        return "Police Response"
    elif any(keyword in message for keyword in infrastructure_keywords):
        return "Infrastructure"
    elif any(keyword in message for keyword in flood_relief_keywords):
        return "Flood Relief"
    else:
        return "Other"

# Step 4: Perform Sentiment Analysis
def analyze_sentiment(message):
    result = classifier(message)[0]
    sentiment = result["label"]
    score = result["score"]
    return sentiment, score

# Step 5: Generate a Response
def generate_response(query):
    prompt = f"Query: {query}\nResponse:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Step 6: Assign Priority
def assign_priority(sentiment):
    if sentiment in ["1 star", "2 stars"]:
        return "High"
    elif sentiment == "3 stars":
        return "Medium"
    else:
        return "Low"

# Step 7: End-to-End Pipeline
def process_grievance(query):
    # Step 1: Categorize the grievance
    category = categorize_grievance(query)

    # Step 2: Analyze sentiment
    sentiment, score = analyze_sentiment(query)

    # Step 3: Assign priority
    priority = assign_priority(sentiment)

    # Step 4: Generate a response
    response = generate_response(query)

    # Step 5: Return results
    return {
        "Category": category,
        "Sentiment": sentiment,
        "Priority": priority,
        "Response": response
    }

# Step 8: Define the Gradio Interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Category:** {result['Category']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Generated Response:** {result['Response']}"
    )
    return output

# Step 9: Launch the Gradio App
with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System")
    gr.Markdown("Enter your grievance below, and the system will provide a categorized response, sentiment analysis, priority level, and a generated response.")

    with gr.Row():
        input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    with gr.Row():
        output_text = gr.Textbox(label="System Response", lines=10)

    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/669M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


tokenizer_config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b4e7fd13ba2a38da84.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
pip install gradio transformers sentence-transformers torch scikit-learn


Collecting gradio
  Downloading gradio-5.20.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Downloading gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3

In [None]:
!pip install gradio pandas openpyxl sentence-transformers faiss-cpu transformers torch

Collecting gradio
  Using cached gradio-5.20.1-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Using cached faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Using cached fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.2 (from gradio)
  Using cached gradio_client-1.7.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Using cached groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Using cached MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>

In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"  # Update this path if needed
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatL2(dimension)  # Create a FAISS index
index.add(embeddings)  # Add embeddings to the index

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    """
    Retrieve the top-k most similar grievances from the knowledge base.

    Args:
        query (str): The input query (new grievance).
        top_k (int): Number of similar grievances to retrieve.

    Returns:
        pd.DataFrame: A DataFrame containing the top-k similar grievances.
    """
    # Generate embedding for the query
    query_embedding = embedding_model.encode([query])

    # Perform similarity search in the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve the corresponding rows from the dataset
    similar_grievances = df.iloc[indices[0]]

    return similar_grievances

# Step 5: Load a generative model (Falcon)
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", trust_remote_code=True)

# Step 6: Define a function to generate a response using Falcon
def generate_response_falcon(query, context):
    """
    Generate a response using Falcon based on the query and retrieved context.

    Args:
        query (str): The input query (new grievance).
        context (str): Retrieved context from similar grievances.

    Returns:
        str: Generated response.
    """
    # Combine the query and context into a prompt
    prompt = f"Query: {query}\nContext: {context}\nResponse:"

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate a response
    outputs = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, temperature=0.7)

    # Decode and return the generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 7: End-to-end pipeline
def process_grievance(query):
    # Step 1: Retrieve similar grievances
    similar_grievances = retrieve_similar_grievances(query)

    # Step 2: Extract context from the retrieved grievances
    context = "\n".join(similar_grievances["Message Description"])

    # Step 3: Generate a response using RAG
    response = generate_response_falcon(query, context)

    # Step 4: Return results
    return {
        "Similar Grievances": similar_grievances[["Message Description", "Resolution Status"]].to_dict(orient="records"),
        "Generated Response": response
    }

# Step 8: Define the Gradio Interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Similar Grievances:**\n"
    )
    for grievance in result["Similar Grievances"]:
        output += f"- {grievance['Message Description']} (Resolution Status: {grievance['Resolution Status']})\n"
    output += f"\n**Generated Response:** {result['Generated Response']}"
    return output

# Step 9: Launch the Gradio App
with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Using RAG)")
    gr.Markdown("Enter your grievance below, and the system will provide similar grievances and a generated response.")

    with gr.Row():
        input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    with gr.Row():
        output_text = gr.Textbox(label="System Response", lines=10)

    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2195742ac4cb2d9114.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM  # Import AutoModelForSeq2SeqLM
import torch

# Step 1: Load models globally
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
model_name = "google/flan-t5-small"  # Use a smaller model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)  # Correctly use AutoModelForSeq2SeqLM
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 2: Load dataset and embeddings
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 3: Define functions
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]]

def generate_response(query, context):
    prompt = f"Query: {query}\nContext: {context}\nResponse:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=100)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def process_grievance(query):
    similar_grievances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"].head(3))
    response = generate_response(query, context)
    return {
        "Similar Grievances": similar_grievances[["Message Description", "Resolution Status"]].to_dict(orient="records"),
        "Generated Response": response
    }

# Step 4: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Similar Grievances:**\n"
    )
    for grievance in result["Similar Grievances"]:
        output += f"- {grievance['Message Description']} (Resolution Status: {grievance['Resolution Status']})\n"
    output += f"\n**Generated Response:** {result['Generated Response']}"
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Using RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=10)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://542efc3214d0747b6d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]]

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline("sentiment-analysis")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 6: Define a function to analyze sentiment
def analyze_sentiment(feedback):
    result = sentiment_analyzer(feedback)[0]
    sentiment = result["label"]
    score = result["score"]
    return sentiment, score

# Step 7: Define a function to assign priority
def assign_priority(sentiment, resolution_status):
    if sentiment == "NEGATIVE" and resolution_status == "Pending":
        return "High"
    elif sentiment == "NEGATIVE":
        return "Medium"
    else:
        return "Low"

# Step 8: Define a function to generate recommendations
def generate_recommendation(query, context):
    # Create a structured prompt for the generative model
    prompt = (
        f"Problem: {query}\n"
        f"Context: {context}\n"
        f"Task: Based on the context, suggest a coherent and actionable solution to address the problem."
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 9: End-to-end pipeline
def process_grievance(query):
    # Step 1: Retrieve similar grievances
    similar_grievances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])

    # Step 2: Analyze sentiment
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]  # Use the first feedback for simplicity
    sentiment, score = analyze_sentiment(feedback)

    # Step 3: Assign priority
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)

    # Step 4: Generate a recommendation
    recommendation = generate_recommendation(query, context)

    # Step 5: Return results
    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 10: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e883b292f0a46e1cde.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]  # Return distances for relevance filtering

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline("sentiment-analysis")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 6: Define a function to analyze sentiment
def analyze_sentiment(feedback):
    result = sentiment_analyzer(feedback)[0]
    sentiment = result["label"]
    score = result["score"]
    return sentiment, score

# Step 7: Define a function to assign priority
def assign_priority(sentiment, resolution_status):
    if sentiment == "NEGATIVE" and resolution_status == "Pending":
        return "High"
    elif sentiment == "NEGATIVE":
        return "Medium"
    else:
        return "Low"

# Step 8: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    # Check relevance of the context
    relevant_context = any(distance < 0.7 for distance in distances)  # Threshold for relevance
    if relevant_context:
        prompt = (
            f"Problem: {query}\n"
            f"Context: {context}\n"
            f"Task: Suggest a coherent and actionable solution based on the context."
        )
    else:
        prompt = (
            f"Problem: {query}\n"
            f"Task: Provide a generic but actionable solution to the problem."
        )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 9: End-to-end pipeline
def process_grievance(query):
    # Step 1: Retrieve similar grievances and distances
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])

    # Step 2: Analyze sentiment
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]  # Use the first feedback for simplicity
    sentiment, score = analyze_sentiment(feedback)

    # Step 3: Assign priority
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)

    # Step 4: Generate a recommendation
    recommendation = generate_recommendation(query, context, distances)

    # Step 5: Return results
    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 10: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://203b8a679dacf99502.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]  # Return distances for relevance filtering

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline("sentiment-analysis")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 6: Define a function to analyze sentiment
def analyze_sentiment(feedback):
    result = sentiment_analyzer(feedback)[0]
    sentiment = result["label"]
    score = result["score"]
    return sentiment, score

# Step 7: Define a function to assign priority
def assign_priority(sentiment, resolution_status):
    if sentiment == "NEGATIVE" and resolution_status == "Pending":
        return "High"
    elif sentiment == "NEGATIVE":
        return "Medium"
    else:
        return "Low"

# Step 8: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    # Check relevance of the context
    relevant_context = any(distance < 0.7 for distance in distances)  # Threshold for relevance
    if relevant_context:
        prompt = (
            f"Problem: {query}\n"
            f"Context: {context}\n"
            f"Task: Suggest a coherent and actionable solution based on the context."
        )
    else:
        prompt = (
            f"Problem: {query}\n"
            f"Task: Provide a generic but actionable solution to the problem."
        )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 9: End-to-end pipeline
def process_grievance(query):
    # Step 1: Retrieve similar grievances and distances
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])

    # Step 2: Analyze sentiment
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]  # Use the first feedback for simplicity
    sentiment, score = analyze_sentiment(feedback)

    # Step 3: Assign priority
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)

    # Step 4: Generate a recommendation
    recommendation = generate_recommendation(query, context, distances)

    # Step 5: Return results
    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 10: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://069020a435f9ff9b27.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
#file_path = "/content/SyntheticGrievances.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]  # Return distances for relevance filtering

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline("sentiment-analysis")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 6: Define a function to analyze sentiment
def analyze_sentiment(feedback):
    result = sentiment_analyzer(feedback)[0]
    sentiment = result["label"]
    score = result["score"]
    return sentiment, score

# Step 7: Define a function to assign priority
def assign_priority(sentiment, resolution_status):
    if sentiment == "NEGATIVE" and resolution_status == "Pending":
        return "High"
    elif sentiment == "NEGATIVE":
        return "Medium"
    else:
        return "Low"

# Step 8: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    # Check relevance of the context
    relevant_context = any(distance < 0.7 for distance in distances)  # Threshold for relevance
    if relevant_context:
        prompt = (
            f"Problem: {query}\n"
            f"Context: {context}\n"
            f"Task: Suggest a coherent and actionable solution based on the context."
        )
    else:
        prompt = (
            f"Problem: {query}\n"
            f"Task: Provide a generic but actionable solution to the problem."
        )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 9: End-to-end pipeline
def process_grievance(query):
    # Step 1: Retrieve similar grievances and distances
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])

    # Step 2: Analyze sentiment
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]  # Use the first feedback for simplicity
    sentiment, score = analyze_sentiment(feedback)

    # Step 3: Assign priority
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)

    # Step 4: Generate a recommendation
    recommendation = generate_recommendation(query, context, distances)

    # Step 5: Return results
    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 10: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d5b309aeba4f39f18f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install faker

Collecting faker
  Downloading faker-36.2.2-py3-none-any.whl.metadata (15 kB)
Downloading faker-36.2.2-py3-none-any.whl (1.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.9/1.9 MB[0m [31m96.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-36.2.2


In [None]:
import pandas as pd
import random
from faker import Faker

# Initialize Faker for generating realistic data
fake = Faker()

# Define grievance templates
templates = [
    "The {infrastructure} in my area is not working. Please fix it as soon as possible.",
    "I am frustrated with the lack of {service} in my neighborhood. This needs immediate attention.",
    "There is poor {condition} in our {location}, leading to {consequence}. This needs urgent action.",
    "The {facility} is overcrowded, and there is no proper maintenance. This is unacceptable.",
    "We need better {resource} management in our {location}. The current situation is frustrating."
]

# Define placeholders
infrastructure = ["streetlights", "drainage system", "water supply", "power supply"]
services = ["police response", "medical facilities", "public transportation", "internet access"]
conditions = ["irrigation", "sanitation", "maintenance", "communication"]
locations = ["village", "neighborhood", "local market", "school", "hospital"]
consequences = ["waterlogging", "unsafe conditions", "crop failure", "long waiting times"]
facilities = ["library", "gym", "park", "community center"]
resources = ["water", "electricity", "fertilizers", "Wi-Fi"]

# Generate synthetic grievances
data = []
for _ in range(5000):
    template = random.choice(templates)
    grievance = template.format(
        infrastructure=random.choice(infrastructure),
        service=random.choice(services),
        condition=random.choice(conditions),
        location=random.choice(locations),
        consequence=random.choice(consequences),
        facility=random.choice(facilities),
        resource=random.choice(resources)
    )
    citizen_name = fake.name()
    officer_name = fake.name()
    communication_date = fake.date_between(start_date="-1y", end_date="today")
    resolution_status = random.choice(["Pending", "Resolved"])
    reappeal_eligibility = random.choice(["Yes", "No"])
    reappeal_reason = fake.sentence() if reappeal_eligibility == "Yes" else "N/A"
    citizen_feedback = fake.sentence()

    data.append([
        fake.uuid4(),  # Grievance ID
        citizen_name,
        officer_name,
        communication_date,
        "Citizen",
        grievance,
        resolution_status,
        reappeal_eligibility,
        reappeal_reason,
        citizen_feedback
    ])

# Create a DataFrame
columns = [
    "Grievance ID", "Citizen Name", "Officer Name", "Communication Date", "Role",
    "Message Description", "Resolution Status", "Reappeal Eligibility", "Reappeal Reason", "Citizen Feedback"
]
df = pd.DataFrame(data, columns=columns)

# Save to Excel
df.to_excel("SyntheticGrievances.xlsx", index=False)
print("Excel file saved successfully.")

Excel file saved successfully.


In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Remove duplicates
df = df.drop_duplicates(subset=["Message Description"], keep="first")

# Step 3: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 4: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 5: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]  # Return distances for relevance filtering

# Step 6: Load pre-trained models for sentiment analysis and generation
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 7: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    # Check relevance of the context
    relevant_context = any(distance < 0.7 for distance in distances)  # Threshold for relevance
    if relevant_context:
        prompt = (
            f"Problem: {query}\n"
            f"Context: {context}\n"
            f"Task: Suggest a coherent and actionable solution based on the context."
        )
    else:
        prompt = (
            f"Problem: {query}\n"
            f"Task: Provide a generic but actionable solution to the problem."
        )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150, repetition_penalty=1.5)
    recommendation = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Debugging: Print the prompt and recommendation
    print("Prompt:", prompt)
    print("Recommendation:", recommendation)

    return recommendation

# Step 8: End-to-end pipeline
def process_grievance(query):
    # Step 1: Retrieve similar grievances and distances
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])

    # Step 2: Generate a recommendation
    recommendation = generate_recommendation(query, context, distances)

    # Step 3: Return results
    return {
        "Context": context,
        "Recommendation": recommendation
    }

# Step 9: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a9632c20d30c7cf9e3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline("sentiment-analysis")
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 6: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    relevant_context = any(distance < 0.7 for distance in distances)  # Threshold for relevance
    if relevant_context:
        prompt = (
            f"Problem: {query}\n"
            f"Context: {context}\n"
            f"Task: Suggest a coherent and actionable solution based on the context."
        )
    else:
        # Fallback: Keyword-based rules for unseen queries
        if "wallet" in query.lower() and "office" in query.lower():
            return "Please contact the office security team immediately and file a report. Check the lost-and-found section and consider blocking your cards."
        elif "payment" in query.lower() or "subsidy" in query.lower():
            return "Contact the finance department for updates on delayed payments. Provide your reference number for faster resolution."
        else:
            prompt = (
                f"Problem: {query}\n"
                f"Task: Provide a generic but actionable solution to the problem."
            )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150, num_beams=5, repetition_penalty=1.5)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 7: End-to-end pipeline
def process_grievance(query):
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]
    sentiment, score = analyze_sentiment(feedback)
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)
    recommendation = generate_recommendation(query, context, distances)
    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 8: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6d0900feffc724a750.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/DatasetGrievence.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
device = "cuda" if torch.cuda.is_available() else "cpu"
embedding_model.to(device)  # Move embedding model to GPU
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",  # Explicitly specify a better model
    revision="main",
    device=0 if torch.cuda.is_available() else -1  # Use GPU if available
)

model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)  # Move to GPU

# Step 6: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    relevant_context = any(distance < 0.7 for distance in distances)
    if relevant_context:
        prompt = f"Problem: {query}\nContext: {context}\nTask: Suggest a coherent and actionable solution based on the context."
    else:
        # Fallback for unseen queries
        if "wallet" in query.lower() and "office" in query.lower():
            return "Contact the office security team immediately, file a report, and check the lost-and-found section."
        else:
            prompt = f"Problem: {query}\nTask: Provide a generic but actionable solution."

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_length=150,
        num_beams=5,
        repetition_penalty=1.5
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 7: End-to-end pipeline
def process_grievance(query):
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])
    feedback = similar_grievances.iloc[0]["Citizen Feedback"]
    sentiment, score = sentiment_analyzer(feedback)[0]["label"], sentiment_analyzer(feedback)[0]["score"]
    resolution_status = similar_grievances.iloc[0]["Resolution Status"]
    priority = assign_priority(sentiment, resolution_status)
    recommendation = generate_recommendation(query, context, distances)
    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 8: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch(debug=True)

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://45a8100cb90a6cfe64.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://b4e7fd13ba2a38da84.gradio.live
Killing tunnel 127.0.0.1:7861 <> https://2195742ac4cb2d9114.gradio.live
Killing tunnel 127.0.0.1:7862 <> https://542efc3214d0747b6d.gradio.live
Killing tunnel 127.0.0.1:7863 <> https://a9632c20d30c7cf9e3.gradio.live
Killing tunnel 127.0.0.1:7864 <> https://d5b309aeba4f39f18f.gradio.live
Killing tunnel 127.0.0.1:7865 <> https://069020a435f9ff9b27.gradio.live
Killing tunnel 127.0.0.1:7866 <> https://6d0900feffc724a750.gradio.live
Killing tunnel 127.0.0.1:7867 <> https://dd1f4a0d53e4042848.gradio.live
Killing tunnel 127.0.0.1:7868 <> https://45a8100cb90a6cfe64.gradio.live




In [None]:
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Step 1: Load the dataset
file_path = "/content/SyntheticGrievances.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Step 2: Generate embeddings for the "Message Description" field
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
device = "cuda" if torch.cuda.is_available() else "cpu"
embedding_model.to(device)
descriptions = df["Message Description"].tolist()
embeddings = embedding_model.encode(descriptions)

# Step 3: Store embeddings in a FAISS index for similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 4: Define a function to retrieve similar grievances
def retrieve_similar_grievances(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    return df.iloc[indices[0]], distances[0]

# Step 5: Load pre-trained models for sentiment analysis and generation
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",  # Outputs "NEGATIVE"/"POSITIVE"
    device=0 if torch.cuda.is_available() else -1
)

model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

# Step 6: Define a function to generate recommendations
def generate_recommendation(query, context, distances):
    relevant_context = any(distance < 0.7 for distance in distances)  # Adjust threshold if needed
    if relevant_context:
        prompt = (
            f"Problem: {query}\n"
            f"Context: {context}\n"
            f"Task: Suggest a coherent and actionable solution based on the context."
        )
    else:
        # Fallback for lost-item scenarios
        if "wallet" in query.lower() and ("office" in query.lower() or "bus stop" in query.lower()):
            return "Contact the security team at the location immediately, file a report, and check the lost-and-found section."
        else:
            prompt = (
                f"Problem: {query}\n"
                f"Task: Provide a generic but actionable solution."
            )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=150, num_beams=5, repetition_penalty=1.5)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 7: End-to-end pipeline
def process_grievance(query):
    similar_grievances, distances = retrieve_similar_grievances(query)
    context = "\n".join(similar_grievances["Message Description"])

    # Analyze sentiment of the query itself (not just retrieved feedback)
    sentiment, score = sentiment_analyzer(query)[0]["label"], sentiment_analyzer(query)[0]["score"]

    # Assign priority based on query sentiment and resolution status
    resolution_status = "Pending"  # Assume new grievances are pending
    priority = assign_priority(sentiment, resolution_status)

    # Generate recommendation
    recommendation = generate_recommendation(query, context, distances)

    return {
        "Context": context,
        "Sentiment": sentiment,
        "Priority": priority,
        "Recommendation": recommendation
    }

# Step 8: Gradio interface
def gradio_app(query):
    result = process_grievance(query)
    output = (
        f"**Context:**\n{result['Context']}\n\n"
        f"**Sentiment:** {result['Sentiment']} (Confidence: {result['Priority']})\n\n"
        f"**Priority Level:** {result['Priority']}\n\n"
        f"**Recommendation:** {result['Recommendation']}"
    )
    return output

with gr.Blocks() as demo:
    gr.Markdown("# AI-Based Grievance Management System (Enhanced RAG)")
    input_query = gr.Textbox(label="Enter Your Grievance", lines=3)
    output_text = gr.Textbox(label="System Response", lines=15)
    submit_button = gr.Button("Submit")
    submit_button.click(gradio_app, inputs=input_query, outputs=output_text)

demo.launch()

Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bb19aa7cf9dc71bfc4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


