In [1]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast, DistilBertForTokenClassification
import torch
import re
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt


DATASET_PATH = "../dataset"
MODEL_PATH =  "../trained_models"

SA_DATA = DATASET_PATH + "/sa"
SA_MODEL = MODEL_PATH + "/sa"

NER_DATA = DATASET_PATH + "/ner"
NER_MODEL = MODEL_PATH + "/ner"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sentiment_model = DistilBertForSequenceClassification.from_pretrained(SA_MODEL)
sentiment_tokenizer = DistilBertTokenizerFast.from_pretrained(SA_MODEL)

ner_model = DistilBertForTokenClassification.from_pretrained(NER_MODEL)
ner_tokenizer = DistilBertTokenizerFast.from_pretrained(NER_MODEL)

print("Model loaded successfully!")

Model loaded successfully!


In [3]:
def split_text(text):
  delimiters = [".", ",", ";", "!", "?", "|", "/"]
  regex_pattern = '|'.join(map(re.escape, delimiters))
  return [seg.strip() for seg in re.split(regex_pattern, text) if seg.strip()]

In [4]:
def get_entities(text):
  tokens = ner_tokenizer(text, return_tensors="pt", truncation=True, padding=True)

  with torch.no_grad():
      outputs = ner_model(**tokens)

  logits = outputs.logits
  predictions = torch.argmax(logits, dim=2)

  tokens_list = ner_tokenizer.convert_ids_to_tokens(tokens["input_ids"][0])
  id2label = ner_model.config.id2label
  predicted_labels = [id2label[p.item()] for p in predictions[0]]

  entities = []
  current_entity = None

  for token, label in zip(tokens_list, predicted_labels):
    if token.startswith("##"):
      if current_entity:
        current_entity["text"] += token[2:]
    elif label.startswith("B-"):
      if current_entity:
        entities.append(current_entity)
      current_entity = {"entity": label[2:], "text": token}
    elif label.startswith("I-") and current_entity:
      current_entity["text"] += " " + token
    else:
      if current_entity:
        entities.append(current_entity)
        current_entity = None

  if current_entity:
    entities.append(current_entity)

  return entities

In [5]:
def get_sentiment(text):
  tokens = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)

  with torch.no_grad():
    outputs = sentiment_model(**tokens)

  logits = outputs.logits
  predicted_class = torch.argmax(logits, dim=1).item()

  label_mapping = {0: "Negative", 1: "Positive"}
  sentiment = label_mapping[predicted_class]

  return sentiment

In [6]:
def analyze_review(text):
  entities = get_entities(text)
  segments = split_text(text)

  entity_sentiments = []

  for entity in entities:
    entity_text = entity['text']
    matched_segment = next((seg for seg in segments if entity_text in seg), text)

    sentiment = get_sentiment(matched_segment)

    entity_sentiments.append({
      "Segment": matched_segment,
      "Entity": entity_text,
      "Sentiment": sentiment,
    })

  return {
    "text": text,
    "entities": entity_sentiments
  }

In [7]:

def generate_insight(sentiment_counts):
    pos = sentiment_counts.get("Positive", 0)
    neg = sentiment_counts.get("Negative", 0)
    total = pos + neg

    if total == 0:
        return "<div class='conclusion-box'>Not enough sentiment data to draw conclusions.</div>"

    pos_pct = (pos / total) * 100
    neg_pct = (neg / total) * 100

    conclusion_content = ""


    if pos_pct >= 65:
        conclusion_content = (
            f"<b>Positive Customer Feedback</b><br><br>"
            f"Most customers had a positive experience, with <b>{pos_pct:.1f}%</b> of reviews expressing positive sentiment. "
            f"Only <b>{neg_pct:.1f}%</b> of the feedback was negative. "
            f"This indicates that your current service quality and food offerings are meeting customer expectations."
        )
    elif neg_pct >= 55:
        conclusion_content = (
           f"<b>Areas That Need Improvement</b><br><br>"
           f"A significant portion of customer feedback is negative, with <b>{neg_pct:.1f}%</b> of reviews expressing dissatisfaction. "
           f"Positive feedback accounts for only <b>{pos_pct:.1f}%</b>. "
           f"This suggests that there are several aspects of the restaurant experience that require attention.<br>"
           f"The 'Top Entities' chart below highlights the areas most frequently mentioned by customers."
        )
    else:
        conclusion_content = (
            f"<b>Mixed Customer Opinions</b><br><br>"
            f"The feedback is fairly balanced, with <b>{pos_pct:.1f}% positive</b> and <b>{neg_pct:.1f}% negative</b> reviews. "
            f"While many customers had a good experience, others felt there was room for improvement. "
            f"Taking a closer look at these reviews can help identify what works well and what needs attention, "
            f"so overall satisfaction can continue to improve."
        )

    return f"<div class='conclusion-box'>{conclusion_content}</div>"

In [8]:
result = analyze_review("The sushi was fresh, but the waiter was rude.")
df = pd.DataFrame(result['entities'])
print("Review:", result['text'])
print("Result:")
df

Review: The sushi was fresh, but the waiter was rude.
Result:


Unnamed: 0,Segment,Entity,Sentiment
0,The sushi was fresh,sushi,Positive
1,The sushi was fresh,fresh,Positive
2,but the waiter was rude,waiter,Negative
3,but the waiter was rude,rude,Negative


In [9]:
def process_all_reviews(file):
  if file is None:
      return pd.DataFrame(), None, None, "<div class='conclusion-box'>Please upload a CSV file first.</div>"

  df = pd.read_csv(file.name)
  df.columns = map(str.lower, df.columns)

  if 'review' in df.columns:
    reviews = df['review'].tolist()
  elif df.shape[1] > 0:
    reviews = df.iloc[:, 0].tolist()
  else:
    return pd.DataFrame(), None, None, "<div class='conclusion-box'>Incorrect CSV format. Ensure there is a 'review' column.</div>"

  all_result = []

  for review in reviews:
    result = analyze_review(str(review))
    for entity in result["entities"]:
      all_result.append({
        "Review": result["text"],
        "Segment": entity["Segment"],
        "Entity": entity["Entity"],
        "Sentiment": entity["Sentiment"]
      })


  if not all_result:
      fig_empty, ax = plt.subplots()
      ax.text(0.5, 0.5, 'No Entities Found', ha='center')
      ax.axis('off')
      return pd.DataFrame(), fig_empty, fig_empty, "<div class='conclusion-box'>No entities (food/service) found in the reviews.</div>"

  all_result = pd.DataFrame(all_result)


  all_sentiment = all_result['Sentiment'].value_counts()
  top_entities = all_result['Entity'].value_counts()


  conclusion_text = generate_insight(all_sentiment)

  display_df = all_result.copy()
  display_df['Review'] = display_df['Review'].mask(display_df['Review'].duplicated(), '')

  fig_pie, ax_pie = plt.subplots(figsize=(6, 6))
  current_labels = all_sentiment.index.tolist()
  current_colors = ['#4CAF50' if l == "Positive" else '#F44336' for l in current_labels]

  wedges, texts, autotexts = ax_pie.pie(all_sentiment, labels=current_labels, autopct='%1.1f%%',
              startangle=140, colors=current_colors, wedgeprops=dict(width=0.6))

  plt.setp(autotexts, size=10, weight="bold", color="white")
  ax_pie.set_title('Sentiment Distribution', fontsize=12, fontweight='bold', pad=20)

  fig_bar, ax_bar = plt.subplots(figsize=(10, 5))
  top_10 = top_entities.head(10)

  bars = ax_bar.bar(top_10.index, top_10.values, color='#009688', alpha=0.8, edgecolor='black', linewidth=0.5)
  ax_bar.set_title('Top 10 Mentioned Items', fontsize=12, fontweight='bold')
  ax_bar.set_ylabel('Mentions')
  ax_bar.tick_params(axis='x', rotation=45)
  ax_bar.grid(axis='y', linestyle='--', alpha=0.7)
  ax_bar.bar_label(bars, padding=3)

  ax_bar.spines['top'].set_visible(False)
  ax_bar.spines['right'].set_visible(False)
  plt.tight_layout()

  return display_df, fig_pie, fig_bar, conclusion_text

In [10]:
sample_data = {
    'review': [
        "The sushi was incredibly fresh and delicious.",
        "The waiter was rude and the service was slow.",
        "Great ambience but the pasta was salty.",
        "I loved the dessert, specifically the tiramisu.",
        "The drinks were overpriced and watered down."
    ]
}
sample_df = pd.DataFrame(sample_data)
sample_csv_path = "sample_reviews.csv"
sample_df.to_csv(sample_csv_path, index=False)

In [11]:
css = """
@import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');

.gradio-container {
    background-color: #ffffff !important;
    font-family: 'Lato', sans-serif !important;
}

.gradio-container p, .gradio-container h1, .gradio-container h2,
.gradio-container h3, .gradio-container li, .gradio-container span {
    color: #000000 !important;
}


.header-container { text-align: center; margin-bottom: 20px; padding: 20px; }
.header-title {
    font-family: 'Lato', sans-serif !important; color: #000000 !important;
    font-size: 1.5rem; font-weight: 800; margin-bottom: 5px !important;
}
.header-desc { color: #444444 !important; font-size: 0.95rem; margin-top: 0px !important; }


button[role="tab"] {
    color: #000000 !important;
    font-weight: bold !important;
    border: 1px solid transparent !important;
}
button[role="tab"][aria-selected="true"] {
    color: #009688 !important;
}
button[role="tab"]:hover {
    color: #ffffff !important;
    background-color: #009688 !important;
}


.home-card {
    background-color: #ffffff !important;
    border: 1px solid #e5e7eb !important;
    border-radius: 8px;
    padding: 20px;
    margin-bottom: 15px;
    box-shadow: none !important;
}
.home-card h3 { color: #009688 !important; border-bottom: 1px solid #e5e7eb; padding-bottom: 10px; }
.home-card * { color: #000000 !important; background-color: transparent !important; }


.gradio-container .block.table, .gradio-container .table-wrap {
    box-shadow: none !important;
    border: 1px solid #e5e7eb !important;
    border-radius: 4px !important;
}
.gradio-container thead th {
    background-color: #009688 !important;
    color: #ffffff !important;
    border: 1px solid #e5e7eb !important;
    border-bottom: 2px solid #00796b !important;
    box-shadow: none !important;
}
.gradio-container thead th span {
    color: #ffffff !important;
}
.gradio-container tbody td {
    background-color: #ffffff !important;
    color: #000000 !important;
    border: 1px solid #e5e7eb !important;
}
.gradio-container tbody td span {
    color: #000000 !important;
}


.label-csv { color: #000000 !important; font-weight: bold; margin-bottom: 5px; }

.gradio-container .examples,
.gradio-container .example,
.gradio-container .examples span,
.gradio-container .example span,
.gradio-container .examples-row span {
    color: #000000 !important;
}

.gradio-container label,
.gradio-container .label {
    color: #000000 !important;
}


.gradio-container .example-item,
.gradio-container .example-item *,
.gradio-container .example-item button,
.gradio-container .example-item button * {
    color: #000 !important;
}

.conclusion-box,
.conclusion-box *,
.conclusion-text *,
.conclusion-text p,
.conclusion-text h3,
.conclusion-text b {
    color: #000000 !important;
}
.conclusion-box {
    background-color: #f0fdf4 !important;
    border: 1px solid #bbf7d0 !important;
    padding: 20px;
    border-radius: 8px;
}

button.gallery-item {
    color: #000000 !important;
}

button.gallery-item:hover {
    color: #ffffff !important;
}
"""


theme = gr.themes.Soft(
    primary_hue="teal",
    secondary_hue="emerald",
).set(
    body_background_fill="white",
    block_label_text_color="#000000",
    block_title_text_color="#000000"
)

def reset_outputs():
    default_html = "<div class='conclusion-box' style='color:#555;'><em>Analysis results and conclusions will appear here...</em></div>"
    return pd.DataFrame(columns=["Review", "Segment", "Entity", "Sentiment"]), None, None, default_html

with gr.Blocks(theme=theme, css=css, title="Sentiment-Aware NER for Analyzing Food and Service Quality in Restaurants") as interface:

    gr.HTML("""
    <div class="header-container">
        <div class="header-title">Sentiment-Aware Named Entity Recognition for Analyzing Food and Service Quality in Restaurants</div>
        <div class="header-desc">Automatic detection of restaurant aspects (Food, Service, Ambience) and their associated sentiment polarity using Deep Learning.</div>
    </div>
    """)

    with gr.Tabs():
        with gr.TabItem("Home"):
            gr.HTML("""
            <div class="home-card">
                <h3>Background Issue</h3>
                <p>In the restaurant industry, customer reviews are a valuable asset. However, manually reading thousands of reviews is time-consuming and prone to bias. Restaurant owners often struggle to know: <em>\"Are customers disappointed because of the food taste or the service?\"</em> This system aims to automatically separate aspects (Entities) and their sentiment polarity.</p>
            </div>
            <div class="home-card">
                <h3>Model Explanation</h3>
                <p>This application uses <strong>Deep Learning (DistilBERT)</strong> which works in two stages:</p>
                <ul>
                    <li><strong>NER (Named Entity Recognition):</strong> Detects keywords such as <em>Sushi, Pizza</em> (Food) or <em>Waiter</em> (Service).</li>
                    <li><strong>Sentiment Analysis:</strong> Determines whether the sentence has a <strong>Positive</strong> or <strong>Negative</strong> meaning.</li>
                </ul>
            </div>
            <div class="home-card">
                <h3>How to Use the Application</h3>
                <ol>
                    <li>Go to the <strong>Analysis Dashboard</strong> tab at the top.</li>
                    <li>Upload a CSV file that has a column named <code>review</code>.</li>
                    <li>Click the <strong>Start Analysis</strong> button.</li>
                    <li>Analysis results in the form of Graphs and Conclusions will appear automatically.</li>
                </ol>
            </div>
            """)

            with gr.Column():
                 gr.HTML("<div class='label-csv'>Correct CSV Data Format Example:</div>")
                 gr.Dataframe(value=pd.DataFrame(sample_data), interactive=False)

        with gr.TabItem("Analysis Dashboard"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Input Data")
                    file_input = gr.File(label="Upload CSV", file_types=[".csv"], height=200)
                    gr.Examples(examples=[[sample_csv_path]], inputs=file_input, label="Use Sample Data", fn=process_all_reviews, outputs=[], cache_examples=False)
                    analyze_btn = gr.Button("Start Analysis", variant="primary", size="lg")

                with gr.Column(scale=2):
                    gr.Markdown("### Conclusion")
                    conclusion_box = gr.HTML(value="<div class='conclusion-box' style='color:#555;'><em>Analysis results and intelligent conclusions will appear here...</em></div>")

            gr.Markdown("---")
            gr.Markdown("### Data Visualization")
            with gr.Row():
                sentiment_plot = gr.Plot(label="Sentiment Distribution")
                entity_plot = gr.Plot(label="Top 10 Menu/Services")

            gr.Markdown("---")
            gr.Markdown("### Extraction Details")
            output_table = gr.Dataframe(headers=["Review", "Segment", "Entity", "Sentiment"], interactive=False, wrap=True)

            file_input.clear(
              fn=reset_outputs,
              inputs=None,
              outputs=[output_table, sentiment_plot, entity_plot, conclusion_box]
            )

    analyze_btn.click(fn=process_all_reviews, inputs=file_input, outputs=[output_table, sentiment_plot, entity_plot, conclusion_box])

interface.launch(debug=True)

  with gr.Blocks(theme=theme, css=css, title="Sentiment-Aware NER for Analyzing Food and Service Quality in Restaurants") as interface:


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


