# Description

Plain analysis and repoting about datacasts from the visual change dataset.

**Remarks**
* Reddit recordings of DOM mutations seem to be broken
* Amazon: one DOM record != complete DOM (separated by URL change, which seems to be not the same as DOM context change)

### Imports

In [None]:
# Modules
import json
import numpy as np

# Settings

In [None]:
# Defines
dataset_dir = r'C:/StimuliDiscoveryData/Dataset_visual_change' # SET ME!
participants = ['p1', 'p2', 'p3', 'p4']

# Categories
shopping = ['walmart', 'amazon', 'steam']
news = ['reddit', 'cnn', 'guardian']
health = ['nih', 'webmd', 'mayo']
cars = ['gm', 'nissan', 'kia']
categories = {'shopping': shopping, 'news': news, 'health': health, 'cars': cars} 

# Extraction

In [None]:
total_mutation_count = 0
total_mutation_count_session = 0
mutations_count_in_sessions = []
total_scroll_count = 0
total_moves_count = 0
total_clicks_count = 0
total_gaze_point_count = 0
total_url_count = 0

# Go over all available sessions
for category, sites in categories.items():
    
            # Go over sites
            for s in sites:
                
                # Go over participants
                for p in participants:
                    
                    # Load file
                    file_path = dataset_dir+ '/' + p + '/' + s + '.json'
                    with open(file_path) as f:
                        
                        print('Working on: ' + file_path)
                        data = json.load(f)
                        
                        # Get DOM records
                        dom_records = data['DOMRecords']
                        print('Count of DOM Records: ' + str(len(dom_records)))
                        
                        # Variables to fill
                        dom_creation_count, dom_creation_count_session = 0, 0
                        dom_add_count, dom_add_count_session = 0, 0
                        dom_remove_count, dom_remove_count_session = 0, 0
                        dom_change_count, dom_change_count_session = 0, 0
                        
                        # Go over entries in DOM records
                        for name, dom_record in dom_records.items():                            
                            for item in dom_record:
                                during_session = int(item['jsVideoTs']) >= 0
                                
                                # Check item
                                dom_type = item['type']
                                if dom_type == 'dom':
                                    dom_creation_count += 1
                                    if during_session: dom_creation_count_session += 1
                                elif dom_type == 'add':
                                    dom_add_count += 1
                                    if during_session: dom_add_count_session += 1
                                elif dom_type == 'remove':
                                    dom_remove_count += 1
                                    if during_session: dom_remove_count_session += 1
                                else: # change
                                    dom_change_count += 1 
                                    if during_session: dom_change_count_session += 1
                        
                        total_mutation_count += dom_add_count + dom_remove_count + dom_change_count
                        mutations_count_session = dom_add_count_session + dom_remove_count_session + dom_change_count_session
                        total_mutation_count_session += mutations_count_session
                        mutations_count_in_sessions.append(mutations_count_session)
                        print(
                            'Creation: ' + str(dom_creation_count)
                            + ', Add: ' + str(dom_add_count)
                            + ', Remove: ' + str(dom_remove_count)
                            + ', Change: ' + str(dom_change_count))
                        print(
                            'Creation during Session: ' + str(dom_creation_count_session)
                            + ', Add during Session: ' + str(dom_add_count_session)
                            + ', Remove during Session: ' + str(dom_remove_count_session)
                            + ', Change during Session: ' + str(dom_change_count_session))
                        
                        # Get events
                        events = data['Events']
                        
                        # Go over events
                        for event in events:
                            
                            # Decide type
                            if event['type'] == 'jsScroll':
                                total_scroll_count += 1
                            elif event['type'] == 'move':
                                total_moves_count += 1
                            elif event['type'] == 'click':
                                total_clicks_count += 1
                                
                        # Get events
                        gaze = data['Gaze']
                        total_gaze_point_count += len(gaze)
                        
                        # Get states
                        states = data['States']
                        
                        # Go over states
                        urls = set() # sometimes, a URL loaded multiple times
                        for state in states:
                            
                            # Decide type
                            if state['type'] == 'initialUrl':
                                urls.add(state['url'])
                            elif state['type'] == 'loadStarted':
                                urls.add(state['url'])
                            elif state['type'] == 'urlChanged':
                                urls.add(state['url'])
                            elif state['type'] == 'mainDOMContentLoaded':
                                urls.add(state['url'])
                        
                        total_url_count += len(urls)
                                
                        print()
                        
print('Total Mutation Count: ' + str(total_mutation_count))
print('Total Mutation Count during Session: ' + str(total_mutation_count_session))
print('Mutations mean / stddev during Session: ' + f'{np.mean(mutations_count_in_sessions):.2f}' + '±' + f'{np.std(mutations_count_in_sessions):.2f}')
print('Total Scroll Count: ' + str(total_scroll_count))
print('Total Move Count: ' + str(total_moves_count))
print('Total Click Count: ' + str(total_clicks_count))
print('Total Gaze Point Count: ' + str(total_gaze_point_count))
print('Total URL Count: ' + str(total_url_count))