In [31]:
import pandas as pd
import os

root = './data'
if not os.path.exists(root): os.mkdir(root)
directories = dict(
    working='working',
    csv='csv',
    excel='excel',
    neighbors='neighbors'
)

for k, v in directories.items():
    directories[k] = os.path.join(root, v)
    if not os.path.exists(directories[k]):
        os.mkdir(directories[k])

In [32]:
# Load data from CSV, group by Enclosure and get columns for locuses for removing them later.
data = pd.read_csv(os.path.join(directories['csv'], 'prepped.csv'))\
# Remove Locuses without reassignment (inplace)
locuses = data.filter(like='Locus').columns
data.drop(locuses, axis=1, inplace=True)
# Group by enclosure (region)
grouped = data.groupby('Enclosure')

In [33]:
# Get total number of observations for each enclosure.
abundance_group = grouped.size()
abundance_items = [(k, v) for k, v in abundance_group.items()]
abundance = pd.DataFrame(abundance_items, columns=['Enclosure', 'Count'])
abundance.to_csv(os.path.join(directories['csv'], 'abundance.csv'))

In [34]:
# Save each group to a working file (group regions to their own file)
for group in grouped:
    path = os.path.join(directories['working'], group[0] + '.csv')
    group[1].to_csv(path)
    # print('Saved {} as {}'.format(group[0], path))

In [35]:
# Sample pull for single item
# from neighbor import Neighbor
# import operator
# sample = pd.read_csv(os.path.join(directories['working'], '41A_n.csv'))
# a = sample.iloc()[0]
# a_data = a.filter(like='Nr').values.tolist()
# # against
# b = sample.iloc()[6]
# b_data = b.filter(like='Nr').values.tolist()
# # show
# print(a_data, 'vs', b_data)
# # try
# n = Neighbor(a_data)
# print(n.diff(b_data))
# n.check(b_data)

In [53]:
import operator
import numpy
from neighbor import Neighbor

# For reach region pull data and build adjacency matrix
regions = sorted(os.listdir(directories['working']))
matrices = dict()
for region in regions:
    # setup basic info for region
    region_name = os.path.splitext(region)[0]
    data = pd.read_csv(os.path.join(directories['working'], region))
    total_rows = len(data.index)
    labels = data['Label'].tolist()
    adjacency = numpy.zeros((total_rows, total_rows), int)
    # print(region_name, '\t', total_rows)
    for idx in range(0, total_rows):
        # setup base data for base record
        row = data.iloc()[idx]
        row_data = row.filter(like='Nr').values.tolist()
        n = Neighbor(row_data, 1)
        # now build neighboring data for that element
        for idy in range(0, total_rows):
            against = data.iloc()[idy]
            against_data = against.filter(like='Nr').values.tolist()
            i = n.check(against_data)
            adjacency[idx,idy] = i
    matrices[region_name] = pd.DataFrame(adjacency, labels, labels)

# save all matrices
for n, m in matrices.items():
    path = os.path.join(directories['neighbors'], n + '.csv')
    m.to_csv(path)
    print('Saved {} to {}'.format(n, path))

41A_n 	 10
41A_w 	 12
43A_e 	 15
43A_n 	 6
44_e 	 40
44_n 	 26
44_s 	 20
46_e 	 7
46_n 	 7
48B_e 	 11
48B_s 	 10
54B_e 	 28
54B_w 	 11
56B_n 	 13
56B_s 	 10
58E_e 	 16
58E_n 	 27
58E_w 	 8
59_e 	 7
59_n 	 20
59_s 	 17
60D_e 	 16
60D_n 	 12
60D_s 	 17
61A_e 	 11
61A_n 	 16
61A_s 	 14
61B_e 	 8
61B_n 	 21
61B_s 	 27
Saved 	61B_n	./data/neighbors/61B_n.csv
Saved 	54B_w	./data/neighbors/54B_w.csv
Saved 	61A_e	./data/neighbors/61A_e.csv
Saved 	61A_s	./data/neighbors/61A_s.csv
Saved 	61B_s	./data/neighbors/61B_s.csv
Saved 	61B_e	./data/neighbors/61B_e.csv
Saved 	56B_n	./data/neighbors/56B_n.csv
Saved 	60D_s	./data/neighbors/60D_s.csv
Saved 	58E_e	./data/neighbors/58E_e.csv
Saved 	58E_n	./data/neighbors/58E_n.csv
Saved 	46_n	./data/neighbors/46_n.csv
Saved 	43A_n	./data/neighbors/43A_n.csv
Saved 	44_n	./data/neighbors/44_n.csv
Saved 	44_e	./data/neighbors/44_e.csv
Saved 	61A_n	./data/neighbors/61A_n.csv
Saved 	59_e	./data/neighbors/59_e.csv
Saved 	58E_w	./data/neighbors/58E_w.csv
Saved 	41A_w

In [43]:
# create dataframes for each matrix?
# use labels from each region to build Series for index/col