<a href="https://colab.research.google.com/github/DawenZhang/online_review_intelligent_kano/blob/filled/product_review_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#@markdown # product feature selection

from collections import OrderedDict
import numpy as np

product_feature_1 = "filter" #@param {type:"string"}
product_feature_1_color = "df6953" #@param {type:"string"}

product_feature_2 = "warranty" #@param {type:"string"}
product_feature_2_color = "a1bc35" #@param {type:"string"}

product_feature_3 = "taste" #@param {type:"string"}
product_feature_3_color = "22afbe" #@param {type:"string"}

product_feature_4 = "customer service" #@param {type:"string"}
product_feature_4_color = "a3a0ec" #@param {type:"string"}

product_feature_5 = "carafe/pitcher" #@param {type:"string"}
product_feature_5_color = "e4b858" #@param {type:"string"}

product_feature_6 = "travel mug" #@param {type:"string"}
product_feature_6_color = "469c6d" #@param {type:"string"}

product_feature_7 = "chamber/tank" #@param {type:"string"}
product_feature_7_color = "8b674e" #@param {type:"string"}

product_feature_8 = "cleaning" #@param {type:"string"}
product_feature_8_color = "d772c0" #@param {type:"string"}

product_feature_9 = "pump" #@param {type:"string"}
product_feature_9_color = "f1b8a4" #@param {type:"string"}

product_feature_10 = "water reservoir" #@param {type:"string"}
product_feature_10_color = "005e9f" #@param {type:"string"}

product_features = OrderedDict({
    product_feature_1: product_feature_1.split("/"),
    product_feature_2: product_feature_2.split("/"),
    product_feature_3: product_feature_3.split("/"),
    product_feature_4: product_feature_4.split("/"),
    product_feature_5: product_feature_5.split("/"),
    product_feature_6: product_feature_6.split("/"),
    product_feature_7: product_feature_7.split("/"),
    product_feature_8: product_feature_8.split("/"),
    product_feature_9: product_feature_9.split("/"),
    product_feature_10: product_feature_10.split("/")
})

color_array = np.array([
    "#" + product_feature_1_color,
    "#" + product_feature_2_color,
    "#" + product_feature_3_color,
    "#" + product_feature_4_color,
    "#" + product_feature_5_color,
    "#" + product_feature_6_color,
    "#" + product_feature_7_color,
    "#" + product_feature_8_color,
    "#" + product_feature_9_color,
    "#" + product_feature_10_color
         ])

In [0]:
#@markdown # data preparation

feature_vocabulary = []
for feature_name in product_features:
    for meta_feature_name in product_features[feature_name]:
        feature_vocabulary.append(meta_feature_name)
        
def check_features(something, product_features):
    feature_list = []
    for i in product_features:
        for feature_tag in product_features[i]:
            if something.lower() == feature_tag:
                feature_list.append(i)
    return feature_list
  
  
#@markdown the product used for anomaly detection
product_id = 1 #@param {type:"integer"}

import pandas as pd

#@markdown the datasheet path; if you wish to upload files, leave them blank
customer_reviews_datasheet_path = "https://github.com/DawenZhang/online_review_intelligent_kano/raw/filled/provided/product_2/customer_reviews.csv" #@param {type:"string"}
entity_sentiment_datasheet_path = "https://github.com/DawenZhang/online_review_intelligent_kano/raw/filled/provided/product_2/entity_sentiment.csv" #@param {type:"string"}

if customer_reviews_datasheet_path == "":
  from google.colab import files
  uploaded = {}
  while len([*uploaded.keys()]) <= 0:
    print("as the path field is left blank, please upload customer_reviews_datasheet")
    uploaded = files.upload()
  import io
  customer_reviews = pd.read_csv(io.StringIO(uploaded[[*uploaded.keys()][0]].decode('utf-8')))
else:
  customer_reviews = pd.read_csv(customer_reviews_datasheet_path)

month_period_list = [m for m in range(1, 13)]
year_period_list = [y for y in range(2016, 2019)]

reviews_by_year = []
  
customer_reviews['date'] = pd.to_datetime(customer_reviews['date'])

from datetime import datetime
from dateutil.relativedelta import relativedelta
from dateutil import rrule


#@markdown the starting and ending dates for time series;
#@markdown ending date is not included
#@markdown ending time must be later than starting time; setting day values to ***01***s is preferred

starting_date = '2016-01-01' #@param {type:"date"}
ending_date = '2018-01-01' #@param {type:"date"}
starting_date_time = datetime.strptime(starting_date, '%Y-%m-%d')
ending_date_time = datetime.strptime(ending_date, '%Y-%m-%d')

added_date_time = starting_date_time

month_count = 0

while added_date_time < ending_date_time:
  month_count += 1
  current_product_reviews = customer_reviews.loc[(customer_reviews['product_id'] == product_id) & 
                                 (customer_reviews['date'] >= added_date_time) &
                                 (customer_reviews['date'] < added_date_time + relativedelta(months = 1)), 
                                 ['review_id', 'review_content']].sort_values(by = ['review_id'])
  added_date_time = added_date_time + relativedelta(months = 1)
  reviews_by_year.append(current_product_reviews)

  
replaced_product_reviews_by_year = []
for reviews_single_year in reviews_by_year:
    replaced_reviews_single_year = []
    for r_index, review in reviews_single_year.iterrows():
        replaced_review = review[1]
        for pf_name in product_features:
          if len(product_features[pf_name]) > 1:
            for pf_e_name in product_features[pf_name][1:]:
              replaced_review = replaced_review.replace(pf_e_name, product_features[pf_name][0])
        replaced_reviews_single_year.append(replaced_review)
    replaced_product_reviews_by_year.append(replaced_reviews_single_year)
    

from sklearn.feature_extraction.text import TfidfVectorizer

tf_features = []

for feature_name in product_features:
    tf_features.append(product_features[feature_name][0])
    
vectorizer = TfidfVectorizer(vocabulary = tf_features, lowercase=True, ngram_range=(1, 2))
tfidf_by_year = []
for replaced_reviews_single_year in replaced_product_reviews_by_year:
    try:
        tfidf = vectorizer.fit_transform(replaced_reviews_single_year)
        tfidf_by_year.append(tfidf)
    except:
        tfidf_by_year.append(None)
        

feature_entities_by_year = []
if entity_sentiment_datasheet_path == "":
  from google.colab import files
  uploaded = {}
  while len([*uploaded.keys()]) <= 0:
    print("as the path field is left blank, please upload entity_sentiment_datasheet")
    uploaded = files.upload()
  import io
  entity_sentiment = pd.read_csv(io.StringIO(uploaded[[*uploaded.keys()][0]].decode('utf-8')))
else:
  entity_sentiment = pd.read_csv(entity_sentiment_datasheet_path)
  
for year_num in range(len(reviews_by_year)):
    feature_entities = {}
    for review_num in range(len(reviews_by_year[year_num])):
        current_all_entities = entity_sentiment.loc[
                                                (entity_sentiment['review_id'] == reviews_by_year[year_num].iloc[review_num]['review_id']) 
                                                & (entity_sentiment['product_id'] == product_id), 
                                                ['name', 'sentiment_score', 'sentiment_magnitude']
                                                ]

        current_entities = {}

        for e_index, entity in current_all_entities.iterrows():

            features = check_features(entity[0], product_features)

            if len(features) > 0:

                if entity[2] == 0:
                    continue

                pending_entity = []

                if tfidf_by_year[year_num] == None:
                    pending_entity.append(0)
                else:
                    pending_entity.append(tfidf_by_year[year_num][review_num, [*product_features].index(features[0])])
                pending_entity.append(entity[1])
                pending_entity.append(entity[2])

                for single_feature in features:
                    if single_feature not in current_entities:
                        current_entities[single_feature] = []
                    current_entities[single_feature].append(pending_entity)

        for feature_name in current_entities:
            if feature_name not in feature_entities:
                feature_entities[feature_name] = []
            adding_entity = [0, 0, 0]
            for meta_feature_entity in current_entities[feature_name]:
                adding_entity[0] += meta_feature_entity[0]
                adding_entity[1] += meta_feature_entity[1]
                adding_entity[2] += meta_feature_entity[2]

            adding_entity[0] = adding_entity[0] / len(current_entities[feature_name])
            adding_entity[1] = adding_entity[1] / len(current_entities[feature_name])

            feature_entities[feature_name].append(adding_entity)
    feature_entities_by_year.append(feature_entities)
    
    
features_average_by_year = []
for feature_entities in feature_entities_by_year:
    features_average = []
    for feature_name in feature_entities:
        positive_sum = 0
        positive_count = 0
        negative_sum = 0
        negative_count = 0
        magnitude_sum = 0
        magnitude_count = 0

        for feature_entity in feature_entities[feature_name]: 
            if feature_entity[1] > 0:
                positive_sum += (feature_entity[1]  * feature_entity[0])
                positive_count += 1
            elif feature_entity[1] < 0:
                negative_sum += (-feature_entity[1] * feature_entity[0])
                negative_count += 1
            magnitude_sum += feature_entity[2] * feature_entity[0]
            magnitude_count += 1

        features_average.append([
                                 positive_count,
                                 positive_sum,
                                 negative_count,
                                 negative_sum,
                                 feature_name
                                ])
    features_average_by_year.append(features_average)
    
features_out_of_order_by_year = []
for average_features in features_average_by_year:
    features_out_of_order = {}
    for feature_out_of_order in average_features:
        features_out_of_order[feature_out_of_order[4]] = feature_out_of_order
    features_out_of_order_by_year.append(features_out_of_order)
    
features_in_order_by_year = []
for features_out_of_order in features_out_of_order_by_year:
    features_in_order = []
    for feature in product_features:
        if feature not in features_out_of_order:
            features_in_order.append([0, 0, 0, 0, feature])
        else:
            features_in_order.append(features_out_of_order[feature])
    features_in_order_by_year.append(features_in_order)
    
for features_in_order_year_num in range(len(features_in_order_by_year)):
    if features_in_order_year_num > 0:
        for feature_num in range(len(features_in_order_by_year[features_in_order_year_num])):
            for attribute_num in range(len(features_in_order_by_year[features_in_order_year_num][feature_num]) - 1):
                features_in_order_by_year[features_in_order_year_num][feature_num][attribute_num] += features_in_order_by_year[features_in_order_year_num - 1][feature_num][attribute_num]
                
for features_in_order_year_num in range(len(features_in_order_by_year)):
    for feature_num in range(len(features_in_order_by_year[features_in_order_year_num])):
        if features_in_order_by_year[features_in_order_year_num][feature_num][0] == 0:
            features_in_order_by_year[features_in_order_year_num][feature_num][0] += 1
        if features_in_order_by_year[features_in_order_year_num][feature_num][2] == 0:
            features_in_order_by_year[features_in_order_year_num][feature_num][2] += 1
            
import numpy as np

kano_array = np.array(features_in_order_by_year)

In [0]:
#@title select angles and radius for Time Series Kano
positive_angle = 23.1 #@param {type:"slider", min:0, max:45, step:0.1}
positive_angle_degree = 90 - positive_angle
negative_angle = 28.4 #@param {type:"slider", min:0, max:45, step:0.1}
negative_angle_degree = negative_angle
corner_radius = 0.3 #@param {type:"slider", min:0, max:1, step:0.1}
x_y_axis_range = 1 #@param {type:"slider", min:0, max:1, step:0.1}
z_axis_range = month_count

view_angle_z = 20 #@param {type:"slider", min:0, max:180, step:1}
view_angle_xy = 225 #@param {type:"slider", min:0, max:360, step:1}

title = "Time Series Kano Classification" #@param {type:"string"}
satisfaction_axis_label = "Customer Satisfaction" #@param {type:"string"}
dissatisfaction_axis_label = "Customer Dissatisfaction" #@param {type:"string"}
time_axis_label = "Time - Month" #@param {type:"string"}

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.gca(projection='3d')
# X_reduced = PCA(n_components=3).fit_transform(en_arr[:, 0:3].data)

ax.set_title(title)
ax.set_xlabel(dissatisfaction_axis_label)
ax.set_ylabel(satisfaction_axis_label)
ax.set_zlabel(time_axis_label)
ax.set_xlim(0, x_y_axis_range)
ax.set_ylim(0, x_y_axis_range)

ax.set_zlim(0, z_axis_range)

for qwk in range(len(kano_array[0])):
    xy_3d = kano_array[:, qwk, :][:, [0, 1, 2, 3]].astype(float)
    
    x_3d = np.array(xy_3d)[:, 1].astype(float) / np.array(xy_3d)[:, 0].astype(float)
    y_3d = np.array(xy_3d)[:, 3].astype(float) / np.array(xy_3d)[:, 2].astype(float)
    z_3d = [ym for ym in range(1, 1 + z_axis_range)]
    
    xyz = np.array([x_3d, y_3d, z_3d])

    for ncount in range(len(xyz[0])):
        ax.plot(xs=[0, xyz[0][ncount]], ys=[0, xyz[1][ncount]], zs=np.array([xyz[2][ncount], xyz[2][ncount]]) - 1, marker='1', markersize=5, color=color_array[qwk])


theta = np.linspace(0, 0.5 * np.pi, 100)
tz = np.linspace(0, 0, 100)
tr = tz ** 2 + corner_radius
tx = tr * np.sin(theta)
ty = tr * np.cos(theta)
ax.plot(tx, ty, tz - z_axis_range * 0.02, color = '#505050', ls = 'dashed')

bz = np.linspace(0, 0, 100)
br = np.linspace(corner_radius * np.cos(np.radians(negative_angle_degree)), x_y_axis_range, 100)
bx = br
by = br * np.tan(np.radians(negative_angle_degree))
ax.plot(bx, by, bz - z_axis_range * 0.02, color = '#505050', ls = 'dashed')

cz = np.linspace(0, 0, 100)
cr = np.linspace(corner_radius * np.cos(np.radians(positive_angle_degree)), x_y_axis_range / np.tan(np.radians(positive_angle_degree)), 100)
cx = cr
cy = cr * np.tan(np.radians(positive_angle_degree))
ax.plot(cx, cy, cz - z_axis_range * 0.02, color = '#505050', ls = 'dashed')


ax.view_init(view_angle_z, view_angle_xy)

# plt.setp(ax.get_xticklabels(), visible=False)
# plt.setp(ax.get_yticklabels(), visible=False)
# plt.setp(ax.get_zticklabels(), visible=False)

plt.show()