<a href="https://colab.research.google.com/github/RomanKrajewski/rpg_evaluation/blob/master/rpg_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')


In [0]:
import csv
import numpy as np
from sklearn.metrics import mean_squared_error as mse
import math
import matplotlib.pyplot as plt
import json
import re
from scipy import stats
import pandas


%cd '/content/drive/My Drive/Colab Notebooks/rpg_files'
resultsfile_path = 'data_textrpg_2020-03-07_12-00.csv'

gender_index = 6
age_index = 7
rpg_code_index = 36
last_item_index = 44

genders = []
ages = []
rpg_codes = []
item_header = []
item_results = []

with open(resultsfile_path, newline='') as resultsfile:
    csvreader = csv.reader(resultsfile, delimiter='\t', quotechar='"')
    header = next(csvreader)
    item_header.extend(header[age_index+1:rpg_code_index])
    
    for row in csvreader:
        genders.append(row[gender_index])
        ages.append(row[age_index])
        rpg_codes.append(row[rpg_code_index])
        item_row_results = row[age_index+1:rpg_code_index]
        item_results.append(item_row_results)

In [0]:
item_results_array = np.array(item_results, np.int8)
ages = np.array(ages, np.uint8)

In [0]:
questionnaire_order_path = 'items_in_questionnaire_order.txt'
game_order_path = 'items_in_game_order.txt'
questionnaire_list = []
questionnaire_labels = {}
game_labels = {}
game_list = []
with open(questionnaire_order_path) as questionnaire_order_file:
    for i, line in enumerate(questionnaire_order_file):
        full_question = line.strip()
        questionnaire_list.append(full_question)
        questionnaire_labels[item_header[i]] = full_question
        
with open(game_order_path) as game_order_file:
    for line in game_order_file:
        game_list.append(line.strip())


In [0]:
game_answer_exp = '-?[0-9]'
rpg_answers = []
for rpg_code in rpg_codes:
    rpg_answers.append(re.findall(game_answer_exp, rpg_code))

In [0]:
with open('game_questions.json') as game_questions_file:
    situations = json.load(game_questions_file)

rpg_answers = np.array(rpg_answers, np.int8)
rpg_answers_ordered = np.zeros((rpg_answers.shape[0], len(questionnaire_list)), np.int8)
for column in range(rpg_answers.shape[1]):
        if(game_list[column] in questionnaire_list):
            game_item_questionnaire_index = questionnaire_list.index(game_list[column])
            rpg_answers_ordered[:, game_item_questionnaire_index] = rpg_answers[:,column]
            game_labels[item_header[game_item_questionnaire_index]] = situations['situations'][column]['text']


In [0]:
rpg_answers_normalized = np.abs(rpg_answers_ordered)
questionnaire_answers_normalized = np.abs(item_results_array)
for i, header_name in enumerate(item_header):
    if "NEG" in header_name:
        rpg_answers_normalized[:,i] = rpg_answers_normalized[:,i]*(-1)
        questionnaire_answers_normalized[:,i] = questionnaire_answers_normalized[:,i]*(-1)

In [0]:
combined_header = [item for tuple in 
                   zip(item_header, [item_header[i] + '_GAME' for i in range(len(item_header))]) 
                   for item in tuple]

In [0]:
combined_results = [item for tuple in 
                   zip([questionnaire_answers_normalized[:,i] for i in range(questionnaire_answers_normalized.shape[1])],
                                    [rpg_answers_normalized[:,i] for i in range(rpg_answers_normalized.shape[1])]) 
                   for item in tuple]
combined_results = np.array(combined_results).T

In [0]:
def getTotalDisEmpRes(i, game):
    results_to_use = questionnaire_answers_normalized
    if game:
        results_to_use = rpg_answers_normalized
    dis = np.sum(results_to_use[i,np.where([("DIS" in ih) for ih in item_header])])
    emp = np.sum(results_to_use[i,np.where([("EMP" in ih) for ih in item_header])])
    res = np.sum(results_to_use[i,np.where([("RES" in ih) for ih in item_header])])
    return (dis, emp, res)

total_scale_headers = ["TOTAL_" + scale + gamestring for gamestring in ["", "_GAME"] for scale in ["DIS", "EMP", "RES"]]
total_scale_results = np.array([[getTotalDisEmpRes(i, game) for game in [False, True]] for i in range(questionnaire_answers_normalized.shape[0])])
total_scale_results = total_scale_results.reshape((len(questionnaire_answers_normalized),len(total_scale_headers)))

In [0]:
def save_as_csv():
  with open('sorted_results.csv', mode='w') as sorted_results_file:
      csv_writer = csv.writer(sorted_results_file, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL)
      sorted_results_header = ["Nr", "Gender", "Age"]
      sorted_results_header.extend(combined_header)
      sorted_results_header.extend(total_scale_headers)
      csv_writer.writerow(sorted_results_header)
      for i in range(combined_results.shape[0]):
          row = [i, genders[i], ages[i]]
          row.extend(combined_results[i,:])
          row.extend(total_scale_results[i,:])
          csv_writer.writerow(row)
          
      moderow = ["Mode", "-", stats.mode(ages)[0][0]]
      moderow.extend([stats.mode(combined_results[:, column])[0][0] 
                        for column in range(combined_results.shape[1])])
      moderow.extend([stats.mode(total_scale_results[:, column])[0][0] 
                        for column in range(total_scale_results.shape[1])])
      csv_writer.writerow(moderow)     
      
      medianrow = ["Median", "-", np.median(ages)]
      medianrow.extend([np.median(combined_results[:, column]) 
                        for column in range(combined_results.shape[1])])
      medianrow.extend([np.median(total_scale_results[:, column]) 
                        for column in range(total_scale_results.shape[1])])
      csv_writer.writerow(medianrow)
      
      meanrow = ["Mean", "-", np.around(np.mean(ages),decimals = 2)]
      meanrow.extend([np.around(np.mean(combined_results[:, column]),decimals = 2) 
                        for column in range(combined_results.shape[1])])
      meanrow.extend([np.around(np.mean(total_scale_results[:, column]), decimals = 2) 
                        for column in range(total_scale_results.shape[1])])
      csv_writer.writerow(meanrow)
      
      stdrow = ["STD", "-", np.around(np.std(ages),decimals = 2)]
      stdrow.extend([np.around(np.std(combined_results[:, column]),decimals = 2) 
                        for column in range(combined_results.shape[1])])
      stdrow.extend([np.around(np.std(total_scale_results[:, column]), decimals = 2) 
                        for column in range(total_scale_results.shape[1])])
      csv_writer.writerow(stdrow)

In [0]:
rmses = [math.sqrt(mse(questionnaire_answers_normalized[:, column], rpg_answers_normalized[:, column])) for column in range(rpg_answers_normalized.shape[1])]

In [0]:
def plot_rmses(values, labels):
  width = 0.1  # the width of the bars
  x = np.arange(0, len(labels)*width, width)  # the label locations


  fig, ax = plt.subplots(figsize=(len(rmses)/2, 10))
  rects1 = ax.bar(x, rmses, width/2)

  ax.set_ylabel('RMSE')
  ax.set_title('RMSE between questionnaire item and corresponding game situation')
  ax.set_xticks(x)
  ax.set_xticklabels(labels, rotation=40, ha="right")

  fig.tight_layout()

# plot_rmses(rmses, questionnaire_labels.values())

In [0]:
df_game = pandas.DataFrame(rpg_answers_normalized, columns = [ih + "_GAME" for ih in item_header])
df_questionnaire = pandas.DataFrame(questionnaire_answers_normalized, columns = item_header)
df_scales = pandas.DataFrame(np.array([getTotalDisEmpRes(i, False)
 for i in range(questionnaire_answers_normalized.shape[0])]), columns = ["DIS", "EMP", "RES"])
df_scales_game = pandas.DataFrame(np.array([getTotalDisEmpRes(i, True)
 for i in range(questionnaire_answers_normalized.shape[0])]), columns = ["DIS", "EMP", "RES"])


In [0]:
correlations = [[stats.spearmanr(df_game.to_numpy()[:, col_a], df_questionnaire.to_numpy()[:, col_b], nan_policy='raise')[0] for col_a in range(df_game.shape[1])] for col_b in range(df_game.shape[1])]
correlations = np.array(correlations)

In [0]:
def plot_corr(df_game, df_questionnaire):
  correlations = [[stats.spearmanr(df_game.to_numpy()[:, col_a], df_questionnaire.to_numpy()[:, col_b], nan_policy='raise')[0] for col_a in range(df_game.shape[1])] for col_b in range(df_game.shape[1])]
  correlations = np.array(correlations)

  fig, ax = plt.subplots(figsize = (20,20))
  im = ax.imshow(correlations)

  # We want to show all ticks...
  ax.set_xticks(np.arange(correlations.shape[0]))
  ax.set_yticks(np.arange(correlations.shape[0]))
  # ... and label them with the respective list entries
  ax.set_xticklabels(item_header)
  ax.set_yticklabels([ih + "_GAME" for ih in item_header])

  plt.colorbar(im)
  # Rotate the tick labels and set their alignment.
  plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
          rotation_mode="anchor")

  ax.set_title("Correlation Matrix of Game Situations and Questionnaire Items")
  fig.tight_layout()
  plt.show()

In [0]:
plot_corr(df_scales_game, df_scales)

In [0]:
df_questionnaire.shape