In [None]:
## Lifelines

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal
from scipy.stats import sem
from itertools import groupby

def plot_lines(ys, title, wtp_mean, wtp_std):
    ys_mean = np.stack(ys).mean(axis=0)
    ys_sem = sem(np.stack(ys), axis=0)
    max_y = 100 * len(ys_mean)
    xs = [i for i in range(0, max_y, 100)]

    plt.figure()
    poly = np.polyfit(xs, ys_mean, 10)
    poly_y = np.poly1d(poly)(xs)
    plt.plot(xs, poly_y, color="#3da803", label="Smooth")
    plt.plot(xs, ys_mean, color="#040dc2", label="Original")
    plt.fill_between(xs, ys_mean - ys_sem, ys_mean + ys_sem, alpha=0.2)
    plt.ylabel("Enjoyment")
    plt.xlabel("Time (ms)")
    plt.title("Enjoyment of {} Movie Trailer Over Time\nWTP M={}, SD={}".format(title, str(wtp_mean), str(wtp_std)))

    plt.legend(bbox_to_anchor=(1.35, 1.0))
    plt.tight_layout()

    ax = plt.gca()
    ax.set_ylim([0, 100])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.savefig("./plots/lifeline_{}.png".format(title), dpi=1000)

data = pd.read_csv('./lifelines.csv')

## Perform Exclusions
data = data.drop(data[data.Finished != 'True'].index)

## All passed at least one of the two attention and comprehension checks
## Drop low connections
data = data.drop(data[data.ResponseId == 'R_10ovXcMfOnkIZYm'].index)

titles = ['Horror', 'Adventure', 'Drama', 'Biography', 'Action', 'Fantasy', 'SciFi', 'Animation']

lengths = []

def listify(row):
    row = str(row).split(',')
    row = [float(y) for y in row]

    return row

for title in titles:
    data.loc[:, '{}Enjoyment'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(listify, args=())
    estr = '4' if title not in ['Action', 'Adventure'] else '1'

## Remove data with too few or many points, and exclude those who have the same enjoyments for more than 20 seconds
rms = []
for index, row in data.iterrows():
    for title in titles:
        if abs(len(row['{}Enjoyment'.format(title)]) - 900) > 300:
            print("Low data qual {}".format(row['ResponseId']))
            print(len(row['{}Enjoyment'.format(title)]))
            rms.append(row['ResponseId'])

        if max([sum(1 for i in g) for k,g in groupby(row['{}Enjoyment'.format(title)])]) > 300:
            print("Bigger than 200.. {}".format(row['ResponseId']))
            print(max([sum(1 for i in g) for k,g in groupby(row['{}Enjoyment'.format(title)])]))
            rms.append(row['ResponseId'])



rms = list(dict.fromkeys(rms))
data = data[~data['ResponseId'].isin(rms)]
print(rms)
print(len(rms))

def resample_time(row):
    return signal.resample(row, 900)

##  Resample the data to be 900 ms, and plot the graphs
for title in titles:
    data.loc[:, '{}Enjoyment'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(resample_time)
    estr = '4' if title not in ['Action', 'Adventure'] else '1'
    plot_lines(data.loc[:, '{}Enjoyment'.format(title)], title='{}'.format(title), wtp_mean=round(data['{}_willing_{}'.format(title.lower(), estr)].astype(int).mean(), 2), wtp_std=round(data['{}_willing_{}'.format(title.lower(), estr)].astype(int).std(), 2))


In [None]:
# Calculate metrics for predicting enjoyments...

# End Value
def get_last(row):
    return row[899]

# Number of Peaks
def get_num_peaks(row):
    return np.count_nonzero(row >= 100)


## TODO: Finish these!

# Number of Valleys
def get_num_valleys(row):
    return

# Number of Extrema
def get_num_extrema(row):
    return

# Integral
def get_integral(row):
    return

# 1st Derivative
def get_derivative_first(row):
    return

# 1st Derivative Prime
def get_derivative_first_prime(row):
    return

# 1st Derivative Asc
def get_derivative_asc(row):
    return

# 1st Derivative Desc
def get_derivative_desc(row):
    return

# 1st Derivative End
def get_derivative_end(row):
    return

# 2nd Derivative
def get_derivative_sec(row):
    return

# 2nd Derivative Prime
def get_derivative_sec_prime(row):
    return

# 2nd Derivative Asc
def get_derivative_sec_asc(row):
    return

# 2nd Derivative Desc
def get_derivative_sec_desc(row):
    return

# 2nd Derivative End
def get_derivative_sec_end(row):
    return

for title in titles:
    data['{}Maximum'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(max)
    data['{}Minimum'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(min)
    data['{}EndValue'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(get_last)
    data['{}NumOfPeaks'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(min)
    data['{}NumOfValleys'.format(title)] = data.loc[:, '{}Enjoyment'.format(title)].apply(min)


data.to_csv('./lifelines_cleaned.csv')

In [None]:
#
#
#
#

In [None]:
# Plot each participant separately

def plot_lines():
    for i, title in enumerate(titles):
        figure, axis = plt.subplots(1, 10)
        count = 0
        for index, row in data.iterrows():
            ys = row['{}Enjoyment'.format(title)]
            max_y = 100 * 900
            xs = [i for i in range(0, max_y, 100)]

            plt.figure()
            poly = np.polyfit(xs, ys, 10)
            poly_y = np.poly1d(poly)(xs)

            axis[count].plot(xs, poly_y, color="#3da803", label="Smooth")
            axis[count].plot(xs, ys, color="#040dc2", label="Original")
            #axis[count, i].set_title('{}Enjoyment -- {}'.format(title, count))

            count += 1
            if count == 10:
                break

        plt.show()


plot_lines()