In [1]:
import re
import logging
import numpy as np
import pandas as pd
import multiprocessing

from re import sub
from time import time 
from unidecode import unidecode
from gensim.models import Word2Vec
from collections import defaultdict
from gensim.models import KeyedVectors
from gensim.test.utils import get_tmpfile
from gensim.models.phrases import Phrases, Phraser

from gensim.models import Word2Vec
from sklearn.cluster import KMeans

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.models import Model

import tensorflow as tf
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel

from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)\

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [2]:
def text_to_word_list(text):
    
    text = str(text)
    text = text.lower()

    # Clean the text
    text = sub(r"[^A-Za-z0-9^,!?.\/'+]", " ", text)
    text = sub(r"\+", " plus ", text)
    text = sub(r",", " ", text)
    text = sub(r"\.", " ", text)
    text = sub(r"!", " ! ", text)
    text = sub(r"\?", " ? ", text)
    text = sub(r"'", " ", text)
    text = sub(r":", " : ", text)
    text = sub(r"\s{2,}", " ", text)

    text = text.split()

    return text

In [3]:
def create_tfidf_dictionary(x, transformed_file, features):
    '''
    create dictionary for each input sentence x, where each word has assigned its tfidf score
    
    inspired  by function from this wonderful article: 
    https://medium.com/analytics-vidhya/automated-keyword-extraction-from-articles-using-nlp-bfd864f41b34
    
    x - row of dataframe, containing sentences, and their indexes,
    transformed_file - all sentences transformed with TfidfVectorizer
    features - names of all words in corpus used in TfidfVectorizer

    '''
    vector_coo = transformed_file[x.name].tocoo()
    vector_coo.col = features.iloc[vector_coo.col].values
    dict_from_coo = dict(zip(vector_coo.col, vector_coo.data))
    return dict_from_coo

def replace_tfidf_words(x, transformed_file, features):
    '''
    replacing each word with it's calculated tfidf dictionary with scores of each word
    x - row of dataframe, containing sentences, and their indexes,
    transformed_file - all sentences transformed with TfidfVectorizer
    features - names of all words in corpus used in TfidfVectorizer
    '''
    dictionary = create_tfidf_dictionary(x, transformed_file, features)   
    return list(map(lambda y:dictionary[f'{y}'], x.title.split()))

In [4]:
def replace_sentiment_words(word, sentiment_dict):
    '''
    replacing each word with its associated sentiment score from sentiment dict
    '''
    try:
        out = sentiment_dict[word]
    except KeyError:
        out = 0
    return out

In [5]:
def classify_sentiment(feed):
    
    word_vectors = Word2Vec.load("word2vec.model").wv
    model = KMeans(n_clusters=2, max_iter=1000, random_state=True, n_init=50).fit(X=word_vectors.vectors.astype('double'))
    sentiment_map = pd.read_csv('sentiment_dictionary.csv')
    sentiment_dict = dict(zip(sentiment_map.words.values, sentiment_map.sentiment_coeff.values))
    new_data = pd.DataFrame(data={'title': [feed], 'rate': [-1]})
    
    file_weighting = new_data.copy()
    tfidf = TfidfVectorizer(tokenizer=lambda y: y.split(), norm=None)
    tfidf.fit(file_weighting.title)
    features = pd.Series(tfidf.get_feature_names())
    transformed = tfidf.transform(file_weighting.title)

    replaced_tfidf_scores = file_weighting.apply(lambda x: replace_tfidf_words(x, transformed, features), axis=1)
    
    sentiment_score = []
    for i in range(len(replaced_tfidf_scores)):
        score = 0
        for j in range(len(replaced_tfidf_scores[i])):
            word = file_weighting.iloc[i].title.split()[j]
            if word in sentiment_dict:
                score += sentiment_dict[word] * replaced_tfidf_scores[i][j]
        sentiment_score.append(score)
    new_data['sentiment_rate'] = sentiment_score
    new_data['prediction'] = (new_data.sentiment_rate>0).astype('int8')
    
    print(new_data)
    
    if new_data.prediction[0] == 0:
        return "Negative"
    elif new_data.prediction[0] == 1:
        return "Positive"

In [10]:
def prescribe_sentiment(feedback):

    # Load the dataset of feedbacks into a variable named df
    df = pd.read_csv('feedbacks_and_prescriptions.csv')
    
    # Create a pipeline to vectorize the feedbacks and train a linear SVM model
    text_clf = Pipeline([('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])
    
    # Fitting model on data
    text_clf.fit(df['feedbacks'], df['precriptions'])
    
    # Getting predictions
    new_feedback = feedback
    predicted_solution = text_clf.predict([new_feedback])[0]
    return 'System Prescription: ' + '\n' + predicted_solution

In [11]:
import panel as pn 

text_input1 = pn.widgets.TextAreaInput(name="Enter Student's Feedback:", width=500, height=150)
text_input2 = pn.widgets.TextAreaInput(name="Enter Teacher's Feedback:", width=500, height=150)
submit_button1 = pn.widgets.Button(name="Submit Student Feedback", button_type="primary", width=160)
submit_button2 = pn.widgets.Button(name="Submit Teachers Feedback", button_type="primary", width=160)
submit_button3 = pn.widgets.Button(name="Get Prescription", button_type="primary")
feedback_display1 = pn.widgets.StaticText(value="", width=500, height=25, background='lightblue')
feedback_display2 = pn.widgets.StaticText(value="", width=500, height=25, background='lightblue')
feedback_display3 = pn.widgets.StaticText(value="System Prescription: ", width=500, height=150, background='lightblue')
info_button1 = pn.widgets.Button(name="?", button_type="success", width=20)
info_button2 = pn.widgets.Button(name="?", button_type="success", width=20)


@submit_button1.on_click
def submit_clicked1(event):
    value1 = text_input1.value
    if len(value1) < 1:
        feedback_display1.value = "Invalid Input"
    else:
        feedback1 = classify_sentiment(value1.lower())
        feedback_display1.value = feedback1

@submit_button2.on_click
def submit_clicked2(event):
    value2 = text_input2.value
    if len(value2) < 1:
        feedback_display2.value = "Invalid Input"
    else:
        feedback2 = classify_sentiment(value2.lower())
        feedback_display2.value = feedback2
    
@submit_button3.on_click
def submit_clicked3(event):
    value3 = text_input1.value
    feedback3 = prescribe_sentiment(value3.lower())
    print('-------------------------------')
    print(feedback3)
    feedback_display3.value = feedback3
    
@info_button1.on_click
def update_text1(event):
    if len(info_button1.name) == 1:
        info_button1.name = 'Kindly add the feedback in the above text field to get it classified'
    else:
        info_button1.name = '?'

@info_button2.on_click
def update_text2(event):
    if len(info_button2.name) == 1:
        info_button2.name = 'Kindly add the feedback in the above text field to get it classified'
    else:
        info_button2.name = '?'


heading = pn.pane.HTML("<h1 style='text-align:center;'>Advisory System Using Sentiment Analysis</h1>")

pn.Column(
pn.Row("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", heading),
pn.Row(text_input1, "", "", "", "", "", text_input2),
pn.Row(feedback_display1, "", "", "", "", "", feedback_display2),
pn.Row("", "", "", "", "", "", "", "", "", "", submit_button1, info_button1, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", submit_button2, info_button2),
pn.Row(""),
pn.Row("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", feedback_display3),
pn.Row("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
       "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", submit_button3),
pn.Row(""),
background='lightgray'
).show(view='popup')

INFO - 21:17:39: Starting Bokeh server version 2.4.2 (running on Tornado 6.1)
INFO - 21:17:39: User authentication hooks NOT provided (default user enabled)


Launching server at http://localhost:56249


<panel.io.server.Server at 0x229197c3820>

INFO - 21:17:40: 200 GET / (::1) 798.02ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/alerts.css (::1) 2.34ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/dataframe.css (::1) 6.59ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/debugger.css (::1) 13.53ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/card.css (::1) 6.29ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/json.css (::1) 8.47ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/loading.css (::1) 9.42ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/widgets.css (::1) 18.62ms
INFO - 21:17:40: 200 GET /static/js/bokeh-gl.min.js?v=863c26b3d7cbcf2a0dbf119589404b3ca66734754cd0af1d6e6ca17679ae711126917f171667194a6f04765eba06d9eb2d7d1f2ba7ef8fee420b9244557386f8 (::1) 5.37ms
INFO - 21:17:40: 200 GET /static/extensions/panel/css/markdown.css (::1) 12.75ms
INFO - 21:17:40: 200 GET /static/js/bokeh-widgets.min.js?v=0ede1975746c96e47b24a08c83a45a9282b6524986185f620debadba5132e16c43f9416

                                               title  rate  sentiment_rate  \
0  this is not excepable as the students are not ...    -1       -7.349486   

   prediction  
0           0  


INFO - 21:17:52: loading Word2Vec object from word2vec.model
INFO - 21:17:52: loading wv recursively from word2vec.model.wv.* with mmap=None
INFO - 21:17:52: setting ignored attribute cum_table to None
INFO - 21:17:52: Word2Vec lifecycle event {'fname': 'word2vec.model', 'datetime': '2023-02-13T21:17:52.434732', 'gensim': '4.1.2', 'python': '3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.22000-SP0', 'event': 'loaded'}


                           title  rate  sentiment_rate  prediction
0  we will work on it to improve    -1        0.030601           1
-------------------------------
System Prescription: 
student seems unsatisfied. a meeting can be arranged with the student for complete details


INFO - 21:18:04: WebSocket connection closed: code=1001, reason=None


In [12]:
# presc_df = pd.read_excel('feedbacks_and_prescriptions.xlsx')
# presc_df.feedbacks = [" ".join(x.lower().split()) for x in presc_df.feedbacks]
# presc_df.precriptions = [" ".join(x.lower().split()) for x in presc_df.precriptions]
# print(len(presc_df))
# presc_df = presc_df.drop_duplicates(subset='feedbacks', keep="first")
# print(len(presc_df))
# presc_df.to_csv('feedbacks_and_prescriptions.csv', index=False)
# presc_df


# classify_sentiment('this is not excepable as the students are not getting proper responses from the teachers'.lower())

# I am unable to pay my university fees because I have no money left