In [None]:
# Import libraries
import numpy as np
import pandas as pd
import pickle as pkl

from thefuzz import process

In [None]:
# Open current pkl+ cut0 file for reading
myfile = open("/home/moonmoon/FD/_Output/sorted_dist.pkl", "rb")
# Load pkl+ file into temporary pandas dataframe with following column names
sorted_dist = pkl.load(myfile)
# Close current file connection
myfile.close()

# Extract Albertson and Keene video IDs from stored dictionary keys
DictKeys = list(sorted_dist.keys())

# Create empty list for storing species names
SpeciesList = []

# Loop through each key in dictionary
for name in DictKeys:
    # Split video IDs at first underscore, store in temporary variable
    temp = name.split("_", maxsplit = 1)
    # Add first position (species name) to SpeciesList
    SpeciesList.append(temp[0])

# Find unique occurrences of species name, store in SpeciesList
SpeciesList = np.unique(SpeciesList)

# Make list of species names of all Keene videos
KeeneList = ['CALLIPTERA', 'KUMWERA', 'LABROSUS', 'STUARTGRANTI']

# Identify all species names that are not in KeeneList, store in AlbertsonList
AlbertsonList = list(filter(lambda x: x not in KeeneList, SpeciesList))

# Loop through all key, value pairs in sorted_dist dictionary
for key, value in sorted_dist.items():
    # Extract current species name from key
    currentName = key.split("_", maxsplit = 1)
    # Save current species name in currentName
    currentName = currentName[0]
    # Delete original id (space saving)
    del value['id']
    # if current name in Keenelist
    if currentName in KeeneList:
        # Set zeit-time (0-23h) by extracting floor of stored datetime object reflecting frame of the video (Keene videos start at ZT 1, an hour after lights on)
        value['ZT'] = value['frame'].dt.floor('h').dt.hour
        # Cut rows of ZT into Day and Night hours, store labels in column labeled Bin 
        value['Bin'] = pd.cut(value['ZT'], bins = [0, 13, 23], labels = ['Day', 'Night'], include_lowest = True)
    # if current name in AlbertsonList
    elif currentName in AlbertsonList:
        # Set zeit-time (0-23h) by extracting ceiling of stored datetime object reflecting frame of the video (Albertson videos start at ZT 0, at lights on)
        value['ZT'] = value['frame'].dt.ceil('h').dt.hour
        # Cut rows of ZT into Day and Night hours, store labels in column labeled Bin 
        value['Bin'] = pd.cut(value['ZT'], bins = [0, 13, 23], labels = ['Day', 'Night'], include_lowest = True)

In [None]:
# Create empty dictionaries to store day and night values separately
dValues = {}
nValues = {}

# Set static variable to False
established = False

# Loop through each key in DictKeys, done to enable editing of dictionary values mid-loop
for key in DictKeys:
    # Store value pair to current key
    value = sorted_dist[key]
    # Extract current species name from key
    currentName = key.split('_', maxsplit = 1)
    # Save current species name in currentName
    currentName = currentName[0]

    # if KUMWERA is not in currentName
    if 'KUMWERA' not in currentName:
        # and if established is == to False
        if not established:
            # store currentName as startName
            startName = currentName
            # print startName to indicate the start of a new block
            print(f'Start {startName}')
            # Copy and store value rows that match Day in dValue
            dValue = value[value['Bin'] == 'Day'].copy()
            # Copy and store value rows that match Night in nValue
            nValue = value[value['Bin'] == 'Night'].copy()
            # delete entry from original dictionary (space saving)
            del sorted_dist[key]
            # Set established to True
            established = True
        # if established
        else:
            # and currentName matches startName
            if currentName == startName:
                # Print chunk to indicate next track is being processed
                print('chunk')
                # Copy and store value rows that match Day in dValue, concatenate to end of previous dValue
                dValue = pd.concat((dValue, value[value['Bin'] == 'Day'].copy()))
                # Copy and store value rows that match Night in nValue, concatenate to end of previous nValue
                nValue = pd.concat((nValue, value[value['Bin'] == 'Night'].copy()))
                # delete entry from original dictionary (space saving)
                del sorted_dist[key]
            # if currentName does not match startName
            else:
                # Store day values in dictionary with startName as key
                dValues[startName] = dValue
                # Store night values in dictionary with startName as key
                nValues[startName] = nValue

                # Print end startName and begin currentName, indicating next species has begun
                print(f'end {startName}; begin {currentName}')
                # Start new startName from currentName
                startName = currentName
                # Print new startName
                print(f'Start {startName}')
                # Copy and store value rows that match Day in dValue
                dValue = value[value['Bin'] == 'Day'].copy()
                # Copy and store value rows that match Night in nValue
                nValue = value[value['Bin'] == 'Night'].copy()
                # delete entry from original dictionary (space saving)
                del sorted_dist[key]

# Print end startName
print(f'end {startName}')
# Store day values in dictionary with startName as key
dValues[startName] = dValue
# Store night values in dictionary with startName as key
nValues[startName] = nValue

In [None]:
# Set established to False
established = False

# Overwrite DictKeys with updated list of Species Keys
DictKeys = list(sorted_dist.keys())

# Loop through each key in DictKeys, done to enable editing of dictionary values mid-loop
for key in DictKeys:
    # Store value pair to current key
    value = sorted_dist[key]
    # Extract current species name from key
    currentName = key.split('_', maxsplit = 1)
    # Save current species name in currentName
    currentName = currentName[0]

    # if KUMWERA in currentName
    if 'KUMWERA' in currentName:
        # and established is equal to False
        if not established:
            # Store currentName as startName
            startName = currentName
            # print startName to indicate the start of a new block
            print(f'Start {startName}')
            # Copy and store value rows that match Day in dValue
            dValue = value[value['Bin'] == 'Day'].copy()
            # Copy and store value rows that match Night in nValue
            nValue = value[value['Bin'] == 'Night'].copy()
            # delete entry from original dictionary (space saving)
            del sorted_dist[key]
            # Set established to True
            established = True
        # if established is equal to true
        else:
            # Print chunk to indicate next track is being processed
            print('chunk')
            # Copy and store value rows that match Day in dValue, concatenate to end of previous dValue
            dValue = pd.concat((dValue, value[value['Bin'] == 'Day'].copy()))
            # Copy and store value rows that match Night in nValue, concatenate to end of previous nValue
            nValue = pd.concat((nValue, value[value['Bin'] == 'Night'].copy()))
            # delete entry from original dictionary (space saving)
            del sorted_dist[key]
            
            # Store day values in dictionary with startName as key
            dValues[startName] = dValue
            # Store night values in dictionary with startName as key
            nValues[startName] = nValue

# Print end startName          
print(f'end {startName}')
# Store day values in dictionary with startName as key
dValues[startName] = dValue
# Store night values in dictionary with startName as key
nValues[startName] = nValue

In [None]:
# Read in mbuna key
key = pd.read_csv('/home/moonmoon/FD/mbunaKey.csv')
# Sort rows by Mbuna x Species, reset index and drop
key = key.sort_values(['Mbuna', 'Species']).reset_index(drop = True)

# Store dValues keys in pkeys
pkeys = pd.DataFrame(dValues.keys())

# Make empty lists to store keys and their matches
keys = []
matches = []

# Loop through each pkey in dValues keys
for pkey in dValues.keys():
    # Store matches from key for each pkey in match
    match = process.extractOne(pkey, key['Species'])
    # Print pkey and its match
    #print(f'{pkey} to {match[0]}')
    # Append first position of match (best match) to matches list
    matches.append(match[0])
    # Append pkey to keys list
    keys.append(pkey)

# Store keys and matches in temporary dataframe, reset index
temp = pd.DataFrame(keys, matches).reset_index()

# Loop through new and old key pairs
for newkey, oldkey in zip(temp.iloc[:, 0], temp.iloc[:, 1]):
    # Print new and old keys
    #print(f'{newkey} to {oldkey}')
    # Pop oldkey from dValues, replace with newkey
    dValues[newkey] = dValues.pop(oldkey)
    # Pop oldkey from nValues, replace with newkey
    nValues[newkey] = nValues.pop(oldkey)

In [None]:
# Open current pkl+ cut0 file for reading
myfile = open("/home/moonmoon/FD/_Output/sorted_dist.pkl", "rb")
# Load pkl+ file into temporary pandas dataframe with following column names
sorted_dist = pkl.load(myfile)
# Close current file connection
myfile.close()

# Extract Albertson and Keene video IDs from stored dictionary keys
DictKeys = list(sorted_dist.keys())

# Create empty list for storing species names
SpeciesList = []

# Loop through each key in dictionary
for name in DictKeys:
    # Split video IDs at first underscore, store in temporary variable
    temp = name.split("_", maxsplit = 1)
    # Add first position (species name) to SpeciesList
    SpeciesList.append(temp[0])

# Find unique occurrences of species name, store in SpeciesList
SpeciesList = np.unique(SpeciesList)

# Make list of species names of all Keene videos
KeeneList = ['CALLIPTERA', 'KUMWERA', 'LABROSUS', 'STUARTGRANTI']

# Identify all species names that are not in KeeneList, store in AlbertsonList
AlbertsonList = list(filter(lambda x: x not in KeeneList, SpeciesList))

# Loop through all key, value pairs in sorted_dist dictionary
for key, value in sorted_dist.items():
    # Extract current species name from key
    currentName = key.split("_", maxsplit = 1)
    # Save current species name in currentName
    currentName = currentName[0]
    # Delete original id (space saving)
    del value['id']
    # if current name in Keenelist
    if currentName in KeeneList:
        # Set zeit-time (0-23h) by extracting floor of stored datetime object reflecting frame of the video (Keene videos start at ZT 1, an hour after lights on)
        value['ZT'] = value['frame'].dt.floor('h').dt.hour
        # Cut rows of ZT into Day and Night hours, store labels in column labeled Bin 
        value['Bin'] = pd.cut(value['ZT'], bins = [0, 13, 23], labels = ['Day', 'Night'], include_lowest = True)
    # if current name in AlbertsonList
    elif currentName in AlbertsonList:
        # Set zeit-time (0-23h) by extracting ceiling of stored datetime object reflecting frame of the video (Albertson videos start at ZT 0, at lights on)
        value['ZT'] = value['frame'].dt.ceil('h').dt.hour
        # Cut rows of ZT into Day and Night hours, store labels in column labeled Bin 
        value['Bin'] = pd.cut(value['ZT'], bins = [0, 13, 23], labels = ['Day', 'Night'], include_lowest = True)

In [None]:
# Create empty dictionaries to store day and night percentages (henceforth referred to as probabilities) separately
dProbs = {}
nProbs = {}

# Set established to False
established = False

# Loop through all key, value pairs in original sorted_dist dictionary
for key, value in sorted_dist.items():
    # Extract current species name from key
    currentName = key.split('_', maxsplit = 1)
    # Save current species name in currentName
    currentName = currentName[0]

    # Cut x Bins into 6x3 bins, store in value as column labeled x_Bins
    value['x_Bins'] = pd.cut(value['x'], bins = [0, 0.166, 0.333, 0.5, 0.666, 0.833, 1])
    # Cut y Bins into 6x3 bins, store in value as column labeled y_Bins
    value['y_Bins'] = pd.cut(value['y'], bins = [0, 0.166, 0.333, 0.5])
    # if KUMWERA not in currentName
    if 'KUMWERA' not in currentName:
        # Copy and store value rows that match Day in dValue
        dValue = value[value['Bin'] == 'Day'].copy()

        # Calculate frame count in x, y bins out of all frames multiple by 100 for normalized probabilities
        dProb = dValue[['x_Bins', 'y_Bins']].value_counts(normalize = True, sort = False) * 100
        # Reset dProb index, moves intervals from index to columns with original names
        dProb = dProb.reset_index()
        # Calculate frame count of x, y Bins in day values
        temp = dValue[['x_Bins', 'y_Bins']].value_counts(sort = False)
        # Reset dProb index, moves intervals from index to columns with original names
        temp = temp.reset_index()
        # Concatenate normalized probabilities dataframe to frame count column for day values
        dProb = pd.concat([dProb, temp['count']], axis = 1)

        # Copy and store value rows that match Night in nValue
        nValue = value[value['Bin'] == 'Night'].copy()

        # Calculate frame count in x, y bins out of all frames multiple by 100 for normalized probabilities
        nProb = nValue[['x_Bins', 'y_Bins']].value_counts(normalize = True, sort = False) * 100
        # Reset nProb index, moves intervals from index to columns with original names
        nProb = nProb.reset_index()
        # Calculate frame count of x, y Bins in night values
        temp = nValue[['x_Bins', 'y_Bins']].value_counts(sort = False) 
        # Reset nProb index, moves intervals from index to columns with original names
        temp = temp.reset_index()
        # Concatenate normalized probabilities dataframe to frame count column for night values
        nProb = pd.concat([nProb, temp['count']], axis = 1)
        
        # if established is equal to False
        if not established:
            # Set startName as currentName
            startName = currentName
            # print startName to indicate the start of a new block
            print(f'Start {startName}')

            # Store dProb as sdProb (species day probabilities)
            sdProb = dProb
            # Store nProb as snProb (species night probabilities)
            snProb = nProb
            # Set established to equal True
            established = True
        # If established is equal to True
        else:
            # and currentName matches startName
            if currentName == startName:
                # Print chunk to indicate next track is being processed
                print('chunk')
                # Concatenate current day probabilities to end of total sdProb
                sdProb = pd.concat([sdProb, dProb]) 
                # Concatenate current night probabilities to end of total snProb
                snProb = pd.concat([snProb, nProb]) 
            # if currentName does not match startName
            else:
                # Group list of probabilities by x, y bins, take mean of all probabilities and counts, and round to four decimal places
                sdProb = sdProb.groupby(['x_Bins', 'y_Bins'], observed = False)[['proportion', 'count']].mean().round(decimals = 4)
                # Reset sdProb index, moves intervals from index to columns with original names
                sdProb = sdProb.reset_index()
                # Print sdProb values
                #print(sdProb)
                # Store sdProb in dictionary with startName as key
                dProbs[startName] = sdProb
                # Overwrite sdProb with new dProb
                sdProb = dProb 

                # Group list of probabilities by x, y bins, take mean of all probabilities and counts, and round to four decimal places
                snProb = snProb.groupby(['x_Bins', 'y_Bins'], observed = False)[['proportion', 'count']].mean().round(decimals = 4)
                # Reset snProb index, moves intervals from index to columns with original names
                snProb = snProb.reset_index()
                # Print snProb values
                #print(snProb)
                # Store snProb in dictionary with startName as key
                nProbs[startName] = snProb
                # Set startName to currentName
                startName = currentName
                # Overwrite sdProb with new dProb 
                snProb = nProb 
                # Print end startName and begin currentName
                print(f'end {startName}; begin {currentName}')

# Print end startName
print(f'end {startName}')
# Group list of probabilities by x, y bins, take mean of all probabilities and counts, and round to four decimal places
sdProb = sdProb.groupby(['x_Bins', 'y_Bins'], observed = False)[['proportion', 'count']].mean().round(decimals = 4)
# Reset sdProb index, moves intervals from index to columns with original names
sdProb = sdProb.reset_index()
# Print sdProb values
#print(sdProb)
# Store sdProb in dictionary with startName as key
dProbs[startName] = sdProb

# Group list of probabilities by x, y bins, take mean of all probabilities and counts, and round to four decimal places
snProb = snProb.groupby(['x_Bins', 'y_Bins'], observed = False)[['proportion', 'count']].mean().round(decimals = 4)
# Reset snProb index, moves intervals from index to columns with original names
snProb = snProb.reset_index()
# Print snProb values
#print(snProb)
# Store snProb in dictionary with startName as key
nProbs[startName] = snProb

In [None]:
# Set established to False
established = False

# Loop through all key, value pairs in original sorted_dist dictionary
for key, value in sorted_dist.items():
    #print(f'{key} to {value}')
    # Extract current species name from key
    currentName = key.split('_', maxsplit = 1)
    # Save current species name in currentName
    currentName = currentName[0]
    
    # if KUMWERA not in currentName
    if 'KUMWERA' in currentName:
        # Copy and store value rows that match Day in dValue
        dValue = value[value['Bin'] == 'Day'].copy()

        # Calculate frame count in x, y bins out of all frames multiple by 100 for normalized probabilities
        dProb = dValue[['x_Bins', 'y_Bins']].value_counts(normalize = True, sort = False) * 100
        # Reset nProb index, moves intervals from index to columns with original names
        dProb = dProb.reset_index()
        # Calculate frame count of x, y Bins in night values
        temp = dValue[['x_Bins', 'y_Bins']].value_counts(sort = False) * 100
        # Reset dProb index, moves intervals from index to columns with original names
        temp = temp.reset_index()
        # Concatenate normalized probabilities dataframe to frame count column for day values
        dProb = pd.concat([dProb, temp['count']], axis = 1)

        # Copy and store value rows that match Night in nValue
        nValue = value[value['Bin'] == 'Night'].copy()

        # Calculate frame count in x, y bins out of all frames multiple by 100 for normalized probabilities
        nProb = nValue[['x_Bins', 'y_Bins']].value_counts(normalize = True, sort = False) * 100
        # Reset nProb index, moves intervals from index to columns with original names
        nProb = nProb.reset_index()
        # Calculate frame count of x, y Bins in night values
        temp = nValue[['x_Bins', 'y_Bins']].value_counts(sort = False) 
        # Reset nProb index, moves intervals from index to columns with original names
        temp = temp.reset_index()
        # Concatenate normalized probabilities dataframe to frame count column for night values
        nProb = pd.concat([nProb, temp['count']], axis = 1)
        
        # if established is equal to False
        if not established:
            # Set startName to currentName
            startName = currentName
            # Print Start startName
            print(f'Start {startName}')

            # Store dProb as sdProb (species day probabilities)
            sdProb = dProb
            # Store nProb as snProb (species night probabilities)
            snProb = nProb
            # Set established to equal True
            established = True
        # if established is equal to True
        else:
            # Print chunk currentName to indicate next track is being processed
            print(f'Chunk {currentName}')
            # Concatenate current day probabilities to end of total sdProb
            sdProb = pd.concat([sdProb, dProb]) 
            # Concatenate current night probabilities to end of total snProb
            snProb = pd.concat([snProb, nProb]) 

# Print end startName
print(f'end {startName}')
# Group list of probabilities by x, y bins, take mean of all probabilities and counts, and round to four decimal places
sdProb = sdProb.groupby(['x_Bins', 'y_Bins'], observed = False)[['proportion', 'count']].mean().round(decimals = 4)
# Reset sdProb index, moves intervals from index to columns with original names
sdProb = sdProb.reset_index()
# Print sdProb values
#print(sdProb)
# Store sdProb in dictionary with startName as key
dProbs[startName] = sdProb

# Group list of probabilities by x, y bins, take mean of all probabilities and counts, and round to four decimal places
snProb = snProb.groupby(['x_Bins', 'y_Bins'], observed = False)[['proportion', 'count']].mean().round(decimals = 4)
# Reset snProb index, moves intervals from index to columns with original names
snProb = snProb.reset_index()
# Print snProb values
#print(snProb)
# Store snProb in dictionary with startName as key
nProbs[startName] = snProb

In [None]:
# Read in mbuna key
key = pd.read_csv('/home/moonmoon/FD/mbunaKey.csv')
# Sort rows by Mbuna x Species, reset index and drop
key = key.sort_values(['Mbuna', 'Species']).reset_index(drop = True)

# Store dProbs keys in pkeys
pkeys = pd.DataFrame(dProbs.keys())

# Make empty lists to store keys and their matches
keys = []
matches = []

# Loop through each pkey in dProbs keys
for pkey in dProbs.keys():
    # Store matches from key for each pkey in match
    match = process.extractOne(pkey, key['Species'])
    # Print pkey and its match
    #print(f'{pkey} to {match[0]}')
    # Append first position of match (best match) to matches list
    matches.append(match[0])
    # Append pkey to keys list
    keys.append(pkey)

# Store keys and matches in temporary dataframe, reset index
temp = pd.DataFrame(keys, matches).reset_index()

# Loop through new and old key pairs
for newkey, oldkey in zip(temp.iloc[:, 0], temp.iloc[:, 1]):
    # Print new and old keys
    #print(f'{newkey} to {oldkey}')
    # Pop oldkey from dProbs, replace with newkey
    dProbs[newkey] = dProbs.pop(oldkey)
    # Pop oldkey from nProbs, replace with newkey
    nProbs[newkey] = nProbs.pop(oldkey)

In [None]:
# Store indexes of bin number for each categorical variable grouping
cornersindex = [0, 2, 15, 17]
notcornerindex = [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16]

topindex = [2, 5, 8, 11, 14, 17]
botindex = [0, 3, 6, 9, 12, 15]

sidesindex = [0, 1, 2, 15, 16, 17]
centerindex = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

# Set established to False
established = False
# Create list to store day categoricals
dcategoricals = []

# Loop through key, value pairs in dProbs
for key, value in dProbs.items():
    # Print mean of proportion spent in each spatial group (corner - notcorner, top - bottom, and sides - center)
    #print(f'{key} to corner {value['proportion'][cornersindex].mean() - value['proportion'][notcornerindex].mean()} top {value['proportion'][topindex].mean() - value['proportion'][botindex].mean()} sides {value['proportion'][sidesindex].mean() - value['proportion'][centerindex].mean()}')
    # Create list to store individual categoricals
    categorical = []
    # Append key to categorical list
    categorical.append(key)

    # if corners - notcorners is less than zero
    if value['proportion'][cornersindex].mean() - value['proportion'][notcornerindex].mean() < 0:
        # Append Middle to categorical list
        categorical.append('Middle')
    # if corners - notcorners is greater than zero
    elif value['proportion'][cornersindex].mean() - value['proportion'][notcornerindex].mean() > 0:
        # Append Corner to categorical list
        categorical.append('Corner')

    # if top - bottom is less than zero
    if value['proportion'][topindex].mean() - value['proportion'][botindex].mean() < 0:
        # Append Bottom to categorical list
        categorical.append('Bottom')
    # if top - bottom is greater than zero
    elif value['proportion'][topindex].mean() - value['proportion'][botindex].mean() > 0:
        # Append Top to categorical list
        categorical.append('Top')

    # if sides - center is less than zero
    if value['proportion'][sidesindex].mean() - value['proportion'][centerindex].mean() < 0:
        # Append Center to categorical list
        categorical.append('Center')
    # if sides - center is greater than zero
    elif value['proportion'][sidesindex].mean() - value['proportion'][centerindex].mean() > 0:
        # Append Side to categorical list
        categorical.append('Side')
    
    # Print individual categoricals
    #print(categorical)
    # Append individual categoricals to day categoricals
    dcategoricals.append(categorical)

# Save day categoricals as dataframe with appropriate column names
dcategoricals = pd.DataFrame(dcategoricals, columns = ['Species', 'Corner/NotCorner', 'Top/Bottom', 'Side/Center'])
# Save day categoricals to csv
dcategoricals.to_csv("/home/moonmoon/FD/_Output/Heatmaps/dayCategoricals.csv")

In [None]:
# Store indexes of bin number for each categorical variable grouping
cornersindex = [0, 2, 15, 17]
notcornerindex = [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16]

topindex = [2, 5, 8, 11, 14, 17]
botindex = [0, 3, 6, 9, 12, 15]

sidesindex = [0, 1, 2, 15, 16, 17]
centerindex = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

# Set established to False
established = False
# Create list to store night categoricals
ncategoricals = []

# Loop through key, value pairs in nProbs
for key, value in nProbs.items():
    # Print mean of proportion spent in each spatial group (corner - notcorner, top - bottom, and sides - center)
    #print(f'{key} to corner {value['proportion'][cornersindex].mean() - value['proportion'][notcornerindex].mean()} top {value['proportion'][topindex].mean() - value['proportion'][botindex].mean()} sides {value['proportion'][sidesindex].mean() - value['proportion'][centerindex].mean()}')
    # Create list to store individual categoricals
    categorical = []
    # Append key to categorical list
    categorical.append(key)

    # if corners - notcorners is less than zero
    if value['proportion'][cornersindex].mean() - value['proportion'][notcornerindex].mean() < 0:
        # Append Middle to categorical list
        categorical.append('Middle')
    # if corners - notcorners is greater than zero
    elif value['proportion'][cornersindex].mean() - value['proportion'][notcornerindex].mean() > 0:
        # Append Corner to categorical list
        categorical.append('Corner')

    # if top - bottom is less than zero
    if value['proportion'][topindex].mean() - value['proportion'][botindex].mean() < 0:
        # Append Bottom to categorical list
        categorical.append('Bottom')
    # if top - bottom is greater than zero
    elif value['proportion'][topindex].mean() - value['proportion'][botindex].mean() > 0:
        # Append Top to categorical list
        categorical.append('Top')

    # if sides - center is less than zero
    if value['proportion'][sidesindex].mean() - value['proportion'][centerindex].mean() < 0:
        # Append Center to categorical list
        categorical.append('Center')
    # if sides - center is greater than zero
    elif value['proportion'][sidesindex].mean() - value['proportion'][centerindex].mean() > 0:
        # Append Side to categorical list
        categorical.append('Side')
    
    # Print individual categoricals
    #print(categorical)
    # Append individual categoricals to night categoricals
    ncategoricals.append(categorical)

# Save night categoricals as dataframe with appropriate column names
ncategoricals = pd.DataFrame(ncategoricals, columns = ['Species', 'Corner/NotCorner', 'Top/Bottom', 'Side/Center'])
# Save night categoricals to csv
ncategoricals.to_csv("/home/moonmoon/FD/_Output/Heatmaps/nightCategoricals.csv")

In [None]:
# Create list to store day variance
var = []

# Loop through key, value pairs in dProbs
for varkey, value in dProbs.items():
    # Print key and value
    #print(f'{key} to {value}')
    # Reset value index
    value = value.reset_index()
    # Append key and variance to var list
    var.append([varkey, np.var(value['proportion']).round(decimals = 2)])

# Save day variance as dataframe with appropriate column names
dayvar = pd.DataFrame(var, columns = ['Species', 'TankUsageVariance'])
# Save day variance to csv
dayvar.to_csv("/home/moonmoon/FD/_Output/Heatmaps/dayTankUsageVariance.csv")

In [None]:
# Create list to store night variance
var = []

# Loop through key, value pairs in nProbs
for varkey, value in nProbs.items():
    # Print key and value
    #print(f'{key} to {value}')
    # Reset value index
    value = value.reset_index()
    # Append key and variance to var list
    var.append([varkey, np.var(value['proportion']).round(decimals = 2)])

# Save night variance as dataframe with appropriate column names
nightvar = pd.DataFrame(var, columns = ['Species', 'TankUsageVariance'])
# Save night variance to csv
nightvar.to_csv("/home/moonmoon/FD/_Output/Heatmaps/nightTankUsageVariance.csv")

In [None]:
# Store indexes of bin number for each categorical variable grouping
cornersindex = [0, 2, 15, 17]
notcornerindex = [1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16]

topindex = [2, 5, 8, 11, 14, 17]
botindex = [0, 3, 6, 9, 12, 15]

sidesindex = [0, 1, 2, 15, 16, 17]
centerindex = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

# Create list to store daynight categoricals and variance
categoricals = []
var = []

# Loop through keys in dProbs
for entry in dProbs.keys(): 
    # Store day probabilities as dvalue   
    dvalue = dProbs[entry]
    # Store night probabilities as nvalue
    nvalue = nProbs[entry]

    # Store dvalue - nvalue as dataframe pvalue with appropriate column names
    pvalue = pd.DataFrame(dvalue[['proportion', 'count']]-nvalue[['proportion', 'count']], columns = ['proportion', 'count'])
    
    # Append key and variance rounded to two decimals to var list
    var.append([entry, np.var(pvalue['proportion']).round(decimals = 2)])
    
    # Create list to store individual categoricals
    categorical = []
    # Append key to categorical list
    categorical.append(entry)

    # if corners - notcorners is less than zero
    if pvalue['proportion'][cornersindex].mean() - pvalue['proportion'][notcornerindex].mean() < 0:
        # Append Middle to categorical list
        categorical.append('Middle')
    # if corners - notcorners is greater than zero
    elif pvalue['proportion'][cornersindex].mean() - pvalue['proportion'][notcornerindex].mean() > 0:
        # Append Corner to categorical list
        categorical.append('Corner')

    # if top - bottom is less than zero
    if pvalue['proportion'][topindex].mean() - pvalue['proportion'][botindex].mean() < 0:
        # Append Bottom to categorical list
        categorical.append('Bottom')
    # if top - bottom is greater than zero
    elif pvalue['proportion'][topindex].mean() - pvalue['proportion'][botindex].mean() > 0:
        # Append Top to categorical list
        categorical.append('Top')

    # if sides - center is less than zero
    if pvalue['proportion'][sidesindex].mean() - pvalue['proportion'][centerindex].mean() < 0:
        # Append Center to categorical list
        categorical.append('Center')
    # if sides - center is greater than zero
    elif pvalue['proportion'][sidesindex].mean() - pvalue['proportion'][centerindex].mean() > 0:
        # Append Side to categorical list
        categorical.append('Side')   

    # Append individual categoricals to daynight categoricals
    categoricals.append(categorical)

# Save daynight variance as dataframe with appropriate column names
var = pd.DataFrame(var, columns = ['Species', 'TankUsageVariance'])
# Save daynight variance to csv
var.to_csv("/home/moonmoon/FD/_Output/Heatmaps/daynightTankUsageVariance.csv")
# Save daynight categoricals as dataframe with appropriate column names
categoricals = pd.DataFrame(categoricals, columns = ['Species', 'Corner/NotCorner', 'Top/Bottom', 'Side/Center'])
# Save daynight categoricals to csv
categoricals.to_csv("/home/moonmoon/FD/_Output/Heatmaps/daynightCategoricals.csv")