# importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings('ignore')
from ipywidgets import *
from IPython.display import display
from ipywidgets import FloatProgress  
import time
from IPython.html import widgets
from sklearn.model_selection import train_test_split
from sklearn.metrics import auc
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.linear_model import LogisticRegression

# Data cleaning and preprocessing

In [2]:
amazon_data = pd.read_csv("Reviews2.csv")
amazon_data = amazon_data.dropna(axis = 0)
amazon_data=amazon_data[["Reviews","Rating"]]
amazon_data_pos=amazon_data[amazon_data["Rating"].isin([4,5])]
amazon_data_neu=amazon_data[amazon_data["Rating"].isin([3])]
amazon_data_neg=amazon_data[amazon_data["Rating"].isin([1,2])]
amazon_data_filtered=pd.concat([amazon_data_pos[:20000],amazon_data_neu[:5000],amazon_data_neg[:20000]])
amazon_data_filtered["r"]=1
amazon_data_filtered["r"][amazon_data_filtered["Rating"].isin([1,2])]= 0
amazon_data_filtered["r"][amazon_data_filtered["Rating"].isin([3])]= 1
amazon_data_filtered["r"][amazon_data_filtered["Rating"].isin([4,5])]= 2



# Splitting Train and Test Data

In [3]:
X_train_data,x_test_data,Y_train_data,y_test_data=train_test_split(amazon_data_filtered["Reviews"],amazon_data_filtered["r"],test_size=0.2)

# Text Transformation

In [4]:
count_vector=CountVectorizer(stop_words="english")
count_vector.fit(X_train_data)
X_train_data_new=count_vector.transform(X_train_data)
x_test_data_new=count_vector.transform(x_test_data)

predictions = dict()

# System GUI Code

In [5]:
import pandas as pd
import numpy as np
import threading
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.linear_model import LogisticRegression
import tkinter as tk
from tkinter import ttk
from tkinter import Toplevel
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import customtkinter

global can
can=None

def log_reg():
    result_window = Toplevel(root)
    result_window.title("Results")
    result_window.geometry("500x600")
    en=tk.Label(result_window, text="Logistic Regression Classifier")
    ld=tk.Label(result_window, text="Please be patient. This may take some time.")
    en.pack()
    ld.pack()
    lr_model = LogisticRegression()
    lr_model.fit(X_train_data_new,Y_train_data)
    
    #Test Model
    predictions['LogisticRegression'] = lr_model.predict(x_test_data_new)
    accuracy = accuracy_score(y_test_data,predictions["LogisticRegression"])
    en_label = tk.Label(result_window, text="Accuracy = " + str(accuracy))
    en_label.pack()


def ensmble():
    result_window = Toplevel(root)
    result_window.title("Results")
    result_window.geometry("500x600")
    en=tk.Label(result_window, text="RandomForest")
    ld=tk.Label(result_window, text="Please be patient. This may take some time.")
    en.pack()
    ld.pack()
    ess_model = RandomForestClassifier()
    
    #Train Model
    ess_model.fit(X_train_data_new,Y_train_data)
    
    #Test Model
    predictions["RandomForest"] = ess_model.predict(x_test_data_new)
    accuracy = accuracy_score(y_test_data,predictions["RandomForest"])
    en_label = tk.Label(result_window, text="Accuracy = " + str(accuracy))
    en_label.pack()


def dtree():
    result_window = Toplevel(root)
    result_window.title("Results")
    result_window.geometry("500x600")
    dtf=tk.Label(result_window, text="Decision Tree Classifier")
    lod=tk.Label(result_window, text="Please be patient. This may take some time.")
    dtf.pack()
    lod.pack()
    tree_model = tree.DecisionTreeClassifier()
    
    #Train Model
    tree_model.fit(X_train_data_new,Y_train_data)
    
    #Test Model
    predictions["DecisionTree"] = tree_model.predict(x_test_data_new)
    accuracy = accuracy_score(y_test_data,predictions["DecisionTree"])
    dt_label = tk.Label(result_window, text="Accuracy = " + str(accuracy))
    dt_label.pack()



def compare_models():
    result_window = Toplevel(root)
    result_window.title("Results")
    result_window.geometry("500x600")
    tk.Label(result_window, text="Please wait. This may take a few minutes").pack()
    tk.Label(result_window, text="Analyzing Logistic Regression").pack()
    lr_model = LogisticRegression()
    lr_model.fit(X_train_data_new,Y_train_data)
    predictions['LogistiRegression'] = lr_model.predict(x_test_data_new)
    tk.Label(result_window, text="Analyzing Decision Tree").pack()
    tree_model = tree.DecisionTreeClassifier()
    tree_model.fit(X_train_data_new,Y_train_data)
    predictions["DecisionTree"] = tree_model.predict(x_test_data_new)
    tk.Label(result_window, text="Analyzing RandomForestClassifier ").pack()
    ess_model = RandomForestClassifier()
    ess_model.fit(X_train_data_new,Y_train_data)
    predictions["RandomForest"] = ess_model.predict(x_test_data_new)

    print_results = {}
    for k, v in predictions.items():
        print_results[k] = accuracy_score(y_test_data, v)
    result_table = pd.DataFrame(list(print_results.items()), columns=["Model", "Accuracy"])

    # Create a frame to hold the table and the chart
    results_frame = ttk.Frame(result_window)
    results_frame.pack(side=tk.TOP, fill=tk.BOTH, expand=1)

    # Create a table using a tkinter treeview widget
    table = ttk.Treeview(results_frame, columns=["Accuracy"])
    table.heading("#0", text="Model")
    table.heading("#1", text="Accuracy")
    table.column("#0", width=200)
    table.column("#1", width=100)
    for index, row in result_table.iterrows():
        table.insert("", tk.END, text=row["Model"], values=[row["Accuracy"]])
    table.pack(side=tk.TOP, fill=tk.BOTH, expand=1)

    # Create a bar chart using matplotlib and seaborn
    fig = plt.Figure(figsize=(6, 6), dpi=100)
    ax = fig.add_subplot(111)
    sns.barplot(x="Model", y="Accuracy", data=result_table, ax=ax)
    ax.set_title("Model accuracy")
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    canvas = FigureCanvasTkAgg(fig, master=results_frame)
    canvas.draw()
    canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)

def analyze_each():
    window = Toplevel(root)
    window.title("Rating")
    window.geometry("500x600")
    def handle_submit():
        if drop_down.get() == "Logistic Regression":
            log_reg()

        if drop_down.get() == "RandomForest":
            ensmble()
        if drop_down.get() == "Decision Tree":
            dtree()
    def prin():  
        tk.Label(root, text="Execution complete").pack()
    def final():
        def handle_submit_wrapper():
            handle_submit()
            prin()
        t = threading.Timer(3.0, handle_submit_wrapper)  # delay execution of handle_submit_wrapper by 3 seconds
        t.start()
            
    # create tkinter widgets
    
    classifier_model = tk.Label(window, text="Classifier Model")
    classifier_model.pack()
    
    models = ["Logistic Regression", "RandomForest", "Decision Tree"]
    drop_down = ttk.Combobox(window, values=models)
    drop_down.pack()
    
    button = ttk.Button(window, text="Submit", command=final)
    button.pack()

def predict_rating():
    window = Toplevel(root)
    window.title("Rating")
    window.geometry("500x600")
    def handle_submit():
        def inputError():
            Out_text.config(text="Input cannot be less than 3 characters long")
        fp.config(text="Processing...")
        # Predicting Rating of the Input Review
        lr_model = LogisticRegression()
        lr_model.fit(X_train_data_new,Y_train_data)
        review = Inp_text.get()
        if len(review) < 3:
            raise inputError()
        new_test_transform = count_vector.transform([review])        
        # get customer review input
        review = Inp_text.get()

        # clear existing output
        Out_text.config(text="---")
        fp.config(text="---")
        # clear customer review input
        
        
        for i in range(100):  
            fp.config(text=f"Processing...{i+1}%")
            fp.update()
            time.sleep(0.01)
        print(lr_model.predict(new_test_transform))
        result = lr_model.predict(new_test_transform)
        if result == 1:
            Out_text.config(text="Neutral")
        elif result == 2: 
            Out_text.config(text="Good")
        else :
            Out_text.config(text="Bad")
    
    # create tkinter widgets
    customer_review = tk.Label(window, text="Customer Review")
    customer_review.pack()
    
    Inp_text = tk.Entry(window, width=50)
    Inp_text.pack()
    
    button_rating = ttk.Button(window, text="Submit", command=handle_submit)
    button_rating.pack()
    
    rating = tk.Label(window, text="Rating")
    rating.pack() 
    
    Out_text = tk.Label(window, text="")
    Out_text.pack()
    
    fp = tk.Label(window, text="")
    fp.pack()
def actions(): 
    def handle_choice():
        if drop_down_choice.get() == "Analyze Each Model":
            analyze_each() 
            
        if drop_down_choice.get() == "Predict Rating":
            predict_rating()
            
        if drop_down_choice.get() == "Compare Models":
            compare_models()
    
    # create tkinter widgets
    drop_down_choice = ttk.Combobox(root, font=("Arial", 18), values=["Analyze Each Model", "Predict Rating", "Compare Models"])
    drop_down_choice.pack()
    tk.Label(root, text = "", font=("Arial", 18)).pack()
     
    font=customtkinter.CTkFont(family='Arial', size=25)
    choose_button = customtkinter.CTkButton(root, font=('Arial',25), text="Submit",width=300, command=handle_choice)
    choose_button.pack()
    can = tk.Frame(root)
    can.pack()

# create tkinter widgets
root = tk.Tk()
root.geometry("500x600")
root.title("Sentiment Analysis of Customer Reviews")
tk.Label(root, text = "Sentiment Analysis of Customer Reviews", fg = "#05445E", font=("Arial", 18)).pack()
actions()


root.mainloop()

[0]
