In [12]:
'''
Sentiment analysis of IMDB movie reviews: plotting results (mean accuracy and precision over 30 trials) 
against categorical modularity. Feel free to change file names or use non-interactive version 
abstractness.py to enter file names as command-line arguments.
'''

# imports
from scipy.stats import spearmanr as sr
import csv

# scores: stores modularity scores - outer key = k, inner key = level
scores = {"2": {"1": {}, "2": {}, "3": {}},
         "3": {"1": {}, "2": {}, "3": {}},
         "4": {"1": {}, "2": {}, "3": {}}}

# reading in modularity data: our data assumes naming convention [level]_[k].csv within a "data" folder
for k in scores.keys():
    for l in scores[k].keys():
        with open("data/" + l + "_" + k + ".csv", newline = "") as csvfile:
            reader = csv.reader(csvfile)
            rows = [row for row in reader]
            for i in range(len(rows[0][1:])):
                scores[k][l][rows[0][i + 1]] = [float(score[i + 1]) for score in rows[1:]]

metrics = {"acc": [], "prec": []}

# reading in movie task performance data: assumes name movie_metrics.csv in ../task_movies directory
# 2-column data [accuracy],[precision]
with open("../task_movies/movie_metrics.csv", newline = "") as csvfile:
    reader = csv.reader(csvfile)
    rows = [list(row) for row in reader]
    for i in range(1, len(rows)):
        metrics["acc"].append(float(rows[i][0]))
        metrics["prec"].append(float(rows[i][1]))

# writes results to a file
with open("movies_correlations.csv", "w") as o:
    o.write("level,k,category,accuracy,precision\n") # column headers
    for k in scores.keys(): # for each level 1, 2, 3
        for l in scores[k].keys(): # go through k = 2, 3, 4
            for cat in scores[k][l].keys():
                o.write(l + "," + k + "," + cat + "," + str(sr(scores[k][l][cat], metrics["acc"]).correlation)
                        + "," + str(sr(scores[k][l][cat], metrics["prec"]).correlation) + "\n")