In [1]:

import numpy as np
import pprint
import math
import matplotlib
import matplotlib.pyplot as plt
from glob import glob
import re
from re import split


In [2]:
mlen = 2 # number states to transition to/from
tlen = 3 # number of ways to measure transition time 
            #-> Either by length of time stable, length of time spent transitioning, or both combined

sttr = '\[*list' # the correct separator for the text as it's formatted currently

In [3]:
def end_append(times1, times2):
    # list of two triplets of times
    times = np.empty(np.shape(times1), dtype=object)
    #smash the triplets together at the end
    for ind, timepair in enumerate(zip(times1, times2)):
        times[ind] = array_smash(timepair) 
    return(times)

def array_smash(timepair):
    # take two times, ie two sets of three rrows, three colums, each column of which is a list of videos's lists of times
    # and return three rows, 3 columns, each column being a combination of the previous 2
    time1, time2 = timepair
    rlen = len(time1)
    clen = len(time1[0])
    new_time_array = np.empty((rlen,clen), dtype = object)
    rinds = range(rlen)
    cinds = range(clen)
    for r in rinds:
        for c in cinds:
            new_time_array[r, c] = np.concatenate((time1[r,c], time2[r,c]))
    return(new_time_array)
            
# get min, max of an array
def minmax(arr):
    return (np.min(arr), np.max(arr))

# Plot the graphs 
def plotter(xs, yls, cols, xlabel): 
    fig,ax = plt.subplots()  
    ranges = []
    for index, y in enumerate(yls):
        plt.plot(xs, y, c = cols[index], ls = '-')
        ran = minmax(y)
        plt.axhline(y=ran[0], c = cols[index], ls = ':')
        plt.axhline(y=ran[1], c = cols[index], ls = ':')
    plt.xlabel(xlabel, fontsize=18)
    plt.ylabel('observed probability of transition', fontsize=16)

# Plot the graphs 
def eig_plotter(xs, yls, cols, xlabel, rangeys): 
    fig,ax = plt.subplots()  
    ranges = []
    for index, y in enumerate(yls):
        plt.plot(xs, y, c = cols[index], ls = '-')
        ran = minmax(rangeys[index])
        plt.axhline(y=ran[0], c = cols[index], ls = ':')
        plt.axhline(y=ran[1], c = cols[index], ls = ':')
    plt.xlabel(xlabel, fontsize=18)
    plt.ylabel('observed probability of transition', fontsize=16)


# Euclidean distance between two vectors
def vector_dist(v1, v2): # euclidean distance between 2 points
    return math.sqrt(np.sum([(v1[i] - v2[i])**2 for i in range(len(v1))]))

# Divide a row through by its sum
# Used for normalising rows in matrices, which might sum to zero (have no transitions)
def ratio(row):
    sums = np.sum(row)
    if sums > 0:
        return row/sums
    else:
        return row

# Divide all columns through by their sums
def col_norm(array):
    cols = array.T
    cols = np.array([ratio(j)*100 for j in cols])
    return cols.T

# Divide all rows through by their sums
def row_norm(array):
    rows = np.array([ratio(j)*100 for j in array])
    return rows

# Flatten an array of lists 
def flattener(array_of_lists):
    return(np.array([x for y in array_of_lists for x in y]))

#read data from its text file
def parser_arr(filename): 
    # returns arrays of all transitions, transition times, and list of all class and transition thresholds
    transitions = []
    thresholds = []
    vidarrs = []
    statecounts = []
    with open(filename) as textfile:
        line = textfile.readline()
        while line:
            if 'Processing' in line: # collect the video number from the text
                vidnum = line.split('pp_')[-1]
                vidnum = vidnum.split('.avi')[0]
                vidnum = vidnum.split('_')[0]
                vidnum = int(vidnum)
                line = textfile.readline()
                
            #collect transition and stability thresholds
            if 'T threshold'in line:                 
                thresh_set = []
                
                # colect stability threshold
                for word in line.split():
                    try:
                        thresh_set.append(float(word))
                    except ValueError:
                        pass
                    
                # colect stability threshold
                line = textfile.readline()
                if 'C threshold' in line:
                    for word in line.split():
                        try:
                            thresh_set.append(float(word)) 
                        except ValueError:
                            pass
                                          
                    #Initialise results arrays for this run
                    if len(thresholds) ==0 or thresholds[-1] != thresh_set:
                        thresholds.append(thresh_set)
                        vidarrs.append([[] for i in range(3)]) # one list each for transitions, video num, and times
                        statecounts.append([]) # num recorded frames in each state
                    else:
                        pass    
                    
                    # Collect transition data
                    line = textfile.readline()
                    trarr = read_square_arr(line, textfile)
                    vidarrs[-1][0].append(trarr)
                    vidarrs[-1][1].append(vidnum)
        
                    # Collect time data
                    tiarrs = []
                    for i in range(tlen):
                        line = textfile.readline()
                        tiarr = read_square_arr_list(line, textfile) 
                        tiarrs.append(tiarr)
                        line = textfile.readline()
                    vidarrs[-1][2].append(tiarrs)  
                    
                    # Collect state data 
                    statecounts[-1].append([float(f) for f in filter(int_from_text, split(' |\[|]', line))])
                else:
                    print("missing C thresh")
                
                
                    
            line = textfile.readline()
                    
                
            
    textfile.close()
    return([np.array(thresholds), np.array(vidarrs), statecounts]) 

# Decide if text can be converted to floats
def int_from_text(text):
    try:
        float(text)
        return True
    except:
        return False
    
# Generate an integer given a simple text string
def int_from_text_list(textlist):
    elements = []
    for el in textlist:
        nums = [int(s) for s in  re.findall(r'\d+',el)]
        elements.append(nums)
    return elements

# Read one of the transition arrays
def read_square_arr(line, textfile):
    if '[[' in line:
        arr = np.zeros((mlen,mlen))
        linenums = [float(f) for f in filter(int_from_text, split(' |\[|]', line))]
        arr[0,:] = linenums

        line = textfile.readline()        
        linenums = [float(f) for f in filter(int_from_text, split(' |\[|]', line))]
        arr[1,:] = linenums

        return(arr)
    else:
        print("missing trans array") # just a warning, in case things go south
  
        
# continue reading lines until endstring is reached
def grab_full_lines(line, string, textfile):
    while string not in line: # means the row has not yet terminated so add the next line 
            line2 = textfile.readline()
            line += line2    
    return line

#Read one of the times-to-transition arrays
def read_square_arr_list(line, textfile):
    if '[[' in line:  # first line of the array
        arr =[]
        
        line = grab_full_lines(line, ')]', textfile)             
        prelims = [x for x in split(sttr , line.replace('\n', '')) if re.match('\S', x)] #Take out newlines, split at separator, keep only non-whitespace
        linenums = int_from_text_list(prelims)
        arr.append(linenums)

        line = textfile.readline() # next row of array
        
        line = grab_full_lines(line, ')]', textfile)        
        prelims = [x for x in split(sttr , line.replace('\n', '')) if re.match('\S', x)]
        linenums = int_from_text_list(prelims)
        arr.append(linenums)

        return(np.array(arr))
    else:
        print("missing times array")
