In [1]:
import numpy as np
import pandas as pd
import os
import nltk
import pickle
from nltk.tokenize import word_tokenize, sent_tokenize

In [2]:
# Download NLTK data if needed
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Huawei\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
def read_lines(filename):
    if not os.path.exists(filename):
        print(f"File not found: {filename}")
        return []
    with open(filename, 'r', encoding="utf-8") as fp:
        return fp.readlines()

# Load custom stopwords from your file (e.g., 'custom_stopwords.txt')
stop_words = read_lines("emotion/stopwords/stopwords.txt")
stop_words = [s.strip().lower() for s in stop_words]  # Clean up the stopwords
    
# Function to cut words from sentences and remove stopwords
def cutword(x):
    seg = nltk.word_tokenize(x)  # Use nltk to tokenize the input text
    new_seg = []
    for key in seg:
        if not (key.strip().lower() in stop_words) and (len(key.strip()) > 1):
            new_seg.append(key)
    return new_seg
    

def cut_sentence(words):
    start = 0
    i = 0
    token = 'meaningless'
    sents = []
    punt_list = ',.!?;~，。！？；～… '  # Punctuation list
    for word in words:
        if word not in punt_list:
            i += 1
            token = list(words[start:i + 2]).pop()
        elif word in punt_list and token in punt_list:
            i += 1
            token = list(words[start:i + 2]).pop()
        else:
            sents.append(words[start:i + 1])  # Save sentence
            start = i + 1
            i += 1
    if start < len(words):
        sents.append(words[start:])
    return sents


def read_lines(filename):
    fp = open(filename, 'r', encoding="utf-8")
    lines = []
    for line in fp.readlines():
        line = line.strip()
        line = line
        lines.append(line)
    return lines


def del_stopwords(seg_sent):
    stopwords = read_lines("emotion/stopwords/stopwords.txt")  
    new_sent = []   
    for word in seg_sent:
        if word in stopwords:
            continue
        else:
            new_sent.append(word)
    return new_sent


In [4]:
# Load the positive and negative word lists
posdict = read_lines("emotion/postive/positive.txt")
negdict = read_lines("emotion/negative/negative.txt")

In [5]:
# Function to match sentiment values based on words
def match(word, sentiment_value):
    if word in posdict:
        sentiment_value *= 1
    elif word in negdict:
        sentiment_value *= -1
    return sentiment_value

In [6]:
# Function to transform sentiment scores
def transform_to_positive_num(poscount, negcount):
    pos_count = 0
    neg_count = 0
    if poscount < 0 and negcount >= 0:
        neg_count += negcount - poscount
        pos_count = 0
    elif negcount < 0 and poscount >= 0:
        pos_count = poscount - negcount
        neg_count = 0
    elif poscount < 0 and negcount < 0:
        neg_count = -poscount
        pos_count = -negcount
    else:
        pos_count = poscount
        neg_count = negcount
    total_count = pos_count + neg_count
    return (pos_count, neg_count)

In [7]:
# Function to calculate sentiment score for a single review
def single_review_sentiment_score(content):
    if not content:  # Check if the input content is empty
        return 0  # Return neutral score

    if not isinstance(content, str) or not content.strip():  # Check if content is a string and not empty
        return 0  # Return neutral score for empty or non-string content
        
    single_review_senti_score = []
    cuted_review = cut_sentence(content)  # Tokenize into sentences

    for sent in cuted_review:
        seg_sent = cutword(sent)  # Tokenize into words
        seg_sent = del_stopwords(seg_sent)  # Remove stopwords

        i = 0
        s = 0
        poscount = 0
        negcount = 0

        for word in seg_sent:
            if word in posdict:
                poscount += 1
                for w in seg_sent[s:i]:
                    poscount = match(w, poscount)
                s = i + 1
            elif word in negdict:
                negcount += 1
                for w in seg_sent[s:i]:
                    negcount = match(w, negcount)
                s = i + 1
            elif word == "!" or word == "!":
                for w2 in seg_sent[::-1]:
                    if w2 in posdict:
                        poscount += 2
                        break
                    elif w2 in negdict:
                        negcount += 2
                        break
            i += 1
        single_review_senti_score.append(transform_to_positive_num(poscount, negcount))
    
    pos_result, neg_result = 0, 0
    for res1, res2 in single_review_senti_score:
        pos_result += res1
        neg_result += res2
    result = pos_result - neg_result
    result = round(result, 1)
    return result

In [8]:
# Function to analyze reviews from an Excel file
def analyze_reviews(file_name, sheet_name='Sheet1'):
    df = pd.read_excel(file_name, sheet_name=sheet_name)
    reviews = df['Review'].tolist()

    pos_list, neg_list, total_list = [], [], []
    
    for review in reviews:
        score = single_review_sentiment_score(review)
        total_list.append(score)
        if score >= 0:
            pos_list.append(score)
        else:
            neg_list.append(score)

    pos_number = len(pos_list)
    neg_number = len(neg_list)
    total_number = pos_number + neg_number

    pos_percentage = round(float(pos_number) / float(total_number) * 100, 2) if total_number != 0 else 0.0
    neg_percentage = round(float(neg_number) / float(total_number) * 100, 2) if total_number != 0 else 0.0

    result_dict = {
        'pos_number': pos_number,
        'neg_number': neg_number,
        'pos_percentage': pos_percentage,
        'neg_percentage': neg_percentage
    }

    return result_dict


In [9]:
# Define a function to sort and compare restaurant reviews based on sentiment
def rank_restaurants(restaurant_results):
    # Sort the restaurants based on their positive sentiment percentage
    sorted_restaurants = sorted(restaurant_results, key=lambda x: x['result']['pos_percentage'], reverse=True)
    return sorted_restaurants

In [10]:
def extract_best_dish(file_name, sheet_name='Sheet1'):
    # Load the Excel sheet into a DataFrame
    df = pd.read_excel(file_name, sheet_name=sheet_name)
    
    # Assuming the best dishes are in a column called 'Recommended'
    recommended_dishes = df['Recommended dishes'].dropna().tolist()  # Drop any empty values

    # Return the first recommended dish as the best dish (adjust logic if needed)
    if recommended_dishes:
        return recommended_dishes[0]  # Return the top recommended dish
    return "No dish recommended"

In [11]:
# Function to save the model
def save_model(restaurant_results, filename='restaurant_model.pkl'):
    with open(filename, 'wb') as f:
        pickle.dump(restaurant_results, f)

# Function to load the model
def load_model(filename='restaurant_model.pkl'):
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            return pickle.load(f)
    return None


In [12]:
# Analyze the reviews for all restaurants
def analyze_all_restaurants():
    restaurants = [
        {"name": "Hajiyar Hotel", "file_name": "restaurants_data/Hajiyar Hotel/Hajiyar Hotel.xlsx"},
        {"name": "Kiri Bhojan Restaurant", "file_name": "restaurants_data/Kiri Bhojan Restaurant/Kiri Bhojan_Restaurant.xlsx"},
        {"name": "Six Flav Kitchen", "file_name": "restaurants_data/Six Flav Kitchen/Six Flav_Kitchen.xlsx"},
        {"name": "Sri Krishna Cafe", "file_name": "restaurants_data/Sri Krishna cafe/Sri Krishna_Cafe.xlsx"},
        {"name": "Sunshine", "file_name": "restaurants_data/Sunshine/Sunshine.xlsx"}
    ]

    restaurant_results = []

    for restaurant in restaurants:
        result = analyze_reviews(restaurant['file_name'])
        best_dish = extract_best_dish(restaurant['file_name'])
        restaurant['result'] = result
        restaurant['best_dish'] = best_dish
        restaurant_results.append(restaurant)

    return restaurant_results

In [13]:
def update_restaurant_data(restaurant_name, new_review, new_recommended_dish, restaurant_results):
    restaurant = next((r for r in restaurant_results if r['name'] == restaurant_name), None)

    if restaurant:
        # Load the existing data from the Exacel file
        df = pd.read_excel(restaurant['file_name'])
        
        # Calculate the sentiment score for the new review
        score = single_review_sentiment_score(new_review)
        sentiment = 1 if score >= 0 else -1  # Set sentiment as 1 for positive, -1 for negative
        
        # Create a new row with the review, recommended dish, and sentiment
        new_row = pd.DataFrame({
            "Review": [new_review],
            "Recommended dishes": [new_recommended_dish],
            "Sentiment": [sentiment]
        })
        
        # Concatenate the new row to the existing DataFrame
        df = pd.concat([df, new_row], ignore_index=True)
        
        # Save the updated DataFrame back to the Excel file
        df.to_excel(restaurant['file_name'], index=False)
        
        # Recalculate the sentiment analysis and update the results
        updated_result = analyze_reviews(restaurant['file_name'])
        updated_best_dish = extract_best_dish(restaurant['file_name'])
        
        # Update the restaurant's result and best dish in the restaurant_results list
        restaurant['result'] = updated_result
        restaurant['best_dish'] = updated_best_dish

        print(f"Restaurant '{restaurant_name}' has been updated with the new review, recommended dish, and sentiment.")
    else:
        print(f"Restaurant '{restaurant_name}' not found.")


In [14]:
# Re-rank the restaurants after updating data
def rerank_restaurants(restaurant_results):
    sorted_restaurants = rank_restaurants(restaurant_results)

    print("\n--- Updated Restaurant Rankings ---")
    for idx, restaurant in enumerate(sorted_restaurants, start=1):
        print(f"{idx}. {restaurant['name']} - Positive Sentiment: {restaurant['result']['pos_percentage']}% - Best Dish: {restaurant['best_dish']}")

In [15]:
# Simulated user input
def get_user_input():
    print("\n--- Update Restaurant Data ---")
    restaurant_name = input("Enter restaurant name (Hajiyar Hotel, Kiri Bhojan Restaurant, Six Flav Kitchen, Sri Krishna Cafe, Sunshine): ")
    new_review = input("Enter your review: ")
    new_recommended_dish = input("Enter your recommended dish: ")
    return restaurant_name, new_review, new_recommended_dish


In [16]:
# Main logic to run the recalculation based on user input
def main():
    # Load previous results if available
    restaurant_results = load_model()
    
    if restaurant_results is None:
        restaurant_results = analyze_all_restaurants()
    
    print("\n--- Initial Restaurant Rankings ---")
    rerank_restaurants(restaurant_results)

    # Get user input for restaurant update
    restaurant_name, new_review, new_recommended_dish = get_user_input()
    
    # Update the restaurant data with new input and re-rank
    update_restaurant_data(restaurant_name, new_review, new_recommended_dish, restaurant_results)
    
    # Recalculate rankings after the update
    rerank_restaurants(restaurant_results)

    # Save the updated model
    save_model(restaurant_results)

# Run the main function
if __name__ == "__main__":
    main()




--- Initial Restaurant Rankings ---

--- Updated Restaurant Rankings ---
1. Sri Krishna Cafe - Positive Sentiment: 90.32% - Best Dish: Vada, Masala Dosa, Ulundu Vadai, Curd Vadai
2. Kiri Bhojan Restaurant - Positive Sentiment: 86.67% - Best Dish: Mongolian rice, noodles
3. Six Flav Kitchen - Positive Sentiment: 84.19% - Best Dish: Kottu, rice, biriyani,noodles,soup, fresh juice
4. Hajiyar Hotel - Positive Sentiment: 83.94% - Best Dish: Hajiyar Special Shawal, Hajiyar Special Meal
5. Sunshine - Positive Sentiment: 75.94% - Best Dish: Seafood Fried Rice, Tandoori Chicken, Chicken Briyani

--- Update Restaurant Data ---


Enter restaurant name (Hajiyar Hotel, Kiri Bhojan Restaurant, Six Flav Kitchen, Sri Krishna Cafe, Sunshine):  Kiri Bhojan Restaurant
Enter your review:  The food was bland and overcooked, leaving me disappointed with my dining experience overall.
Enter your recommended dish:  Normal rice and curry


Restaurant 'Kiri Bhojan Restaurant' has been updated with the new review, recommended dish, and sentiment.

--- Updated Restaurant Rankings ---
1. Sri Krishna Cafe - Positive Sentiment: 90.32% - Best Dish: Vada, Masala Dosa, Ulundu Vadai, Curd Vadai
2. Kiri Bhojan Restaurant - Positive Sentiment: 86.26% - Best Dish: Mongolian rice, noodles
3. Six Flav Kitchen - Positive Sentiment: 84.19% - Best Dish: Kottu, rice, biriyani,noodles,soup, fresh juice
4. Hajiyar Hotel - Positive Sentiment: 83.94% - Best Dish: Hajiyar Special Shawal, Hajiyar Special Meal
5. Sunshine - Positive Sentiment: 75.94% - Best Dish: Seafood Fried Rice, Tandoori Chicken, Chicken Briyani


In [17]:
from sklearn.metrics import accuracy_score

# Function to calculate accuracy for a specific file with restaurant name
def calculate_accuracy_with_name(file_name, restaurant_name, sheet_name='Sheet1'):
    # Load the Excel sheet into a DataFrame
    df = pd.read_excel(file_name, sheet_name=sheet_name)
    
    # Drop rows with missing sentiment values
    df = df.dropna(subset=['Sentiment'])
    
    # Extract reviews and actual sentiment labels from the DataFrame
    reviews = df['Review'].tolist()
    actual_sentiments = df['Sentiment'].tolist()  # Ensure there are no NaN values

    predicted_sentiments = []

    # Calculate sentiment score for each review and determine predicted sentiment
    for review in reviews:
        score = single_review_sentiment_score(review)
        predicted_sentiment = 1 if score >= 0 else -1  # Use 1 for positive and -1 for negative
        predicted_sentiments.append(predicted_sentiment)
    
    # Calculate the accuracy score
    accuracy = accuracy_score(actual_sentiments, predicted_sentiments)
    accuracy_percentage = round(accuracy * 100, 2)
    
    # Print the accuracy with the restaurant name
    print(f"Restaurant: {restaurant_name} - Model Accuracy: {accuracy_percentage}%")
    return accuracy_percentage



In [18]:
# Example usage
file_name = "restaurants_data/Hajiyar Hotel/Hajiyar Hotel.xlsx"  # Replace with your actual file path
restaurant_name = "Hajiyar Hotel"  # Replace with the restaurant name
accuracy = calculate_accuracy_with_name(file_name, restaurant_name)

Restaurant: Hajiyar Hotel - Model Accuracy: 77.98%


In [19]:
# Example usage
file_name = "restaurants_data/Kiri Bhojan Restaurant/Kiri Bhojan_Restaurant.xlsx"  # Replace with your actual file path
restaurant_name = "Kiri Bhojan Restaurant"  # Replace with the restaurant name
accuracy = calculate_accuracy_with_name(file_name, restaurant_name)

Restaurant: Kiri Bhojan Restaurant - Model Accuracy: 82.94%


In [20]:
# Example usage
file_name = "restaurants_data/Six Flav Kitchen/Six Flav_Kitchen.xlsx"  # Replace with your actual file path
restaurant_name = "Six Flav Kitchen"  # Replace with the restaurant name
accuracy = calculate_accuracy_with_name(file_name, restaurant_name)

Restaurant: Six Flav Kitchen - Model Accuracy: 80.93%


In [21]:
# Example usage
file_name = "restaurants_data/Sri Krishna cafe/Sri Krishna_Cafe.xlsx"  # Replace with your actual file path
restaurant_name = "Sri Krishna Cafe"  # Replace with the restaurant name
accuracy = calculate_accuracy_with_name(file_name, restaurant_name)

Restaurant: Sri Krishna Cafe - Model Accuracy: 88.48%


In [22]:
# Example usage
file_name = "restaurants_data/Sunshine/Sunshine.xlsx"  # Replace with your actual file path
restaurant_name = "Sunshine"  # Replace with the restaurant name
accuracy = calculate_accuracy_with_name(file_name, restaurant_name)

Restaurant: Sunshine - Model Accuracy: 68.87%
