# Scratch pad

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from itertools import combinations

### Test summing the distance of clubs in an area
Get the average normalized distance between every pair of clubs in an area.

In [2]:
dist = pd.read_csv('club_distance_matrix.csv')
dist.set_index('club_no', inplace=True)
# Make sure the columns are integers too
dist.columns = dist.columns.astype(int)
dist.head()

Unnamed: 0_level_0,5509,7036,9682,584009,1100434,718,4819,9790,5069647,7575630,...,1783,596735,4700632,5258000,2690,8569,3929213,4822437,5569,1565753
club_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5509,0.0,0.0,0.309451,0.31531,0.0,0.309451,0.338685,0.297754,0.347098,0.311623,...,0.752496,0.737553,0.748308,0.748308,0.748079,0.748308,0.748308,0.749572,0.748308,0.748308
7036,0.0,0.0,0.309451,0.31531,0.0,0.309451,0.338685,0.297754,0.347098,0.311623,...,0.752496,0.737553,0.748308,0.748308,0.748079,0.748308,0.748308,0.749572,0.748308,0.748308
9682,0.360002,0.360002,0.0,0.033326,0.360002,0.0,0.035332,0.024728,0.100703,0.011137,...,0.896155,0.892741,0.894353,0.894353,0.893528,0.894353,0.894353,0.895944,0.894353,0.894353
584009,0.378986,0.378986,0.034432,0.0,0.378986,0.034432,0.047424,0.024879,0.070442,0.045275,...,0.927359,0.924798,0.926084,0.926084,0.925535,0.926084,0.926084,0.927159,0.926084,0.926084
1100434,0.0,0.0,0.309451,0.31531,0.0,0.309451,0.338685,0.297754,0.347098,0.311623,...,0.752496,0.737553,0.748308,0.748308,0.748079,0.748308,0.748308,0.749572,0.748308,0.748308


In [3]:
# Get the distance between two clubs
dist.loc[(5509, 9682)]

0.3094514953565958

In [4]:
# Hypothetical area of four clubs by club number
area1 = [5509, 9682, 584009, 718]
list(combinations(area1, 2))

[(5509, 9682),
 (5509, 584009),
 (5509, 718),
 (9682, 584009),
 (9682, 718),
 (584009, 718)]

In [5]:
# Write a function that returns the average distances between clubs in an area
def area_dist(clubs, distance_matrix):
    return sum([distance_matrix.loc[pair] for pair in list(combinations(clubs, 2))]) / len(clubs)

area_dist(area1, dist)

0.25049283527727934

### Test club quality distribution

In [7]:
ideal = [0.0, 0.25, 0.5, 0.75, 1.0]
alt1 = [0.0, 0.0, 0.5, 1.0, 1.0]
alt2 = [0.0, 0.5, 0.5, 0.5, 1.0]
alt3 = [0.0, 1.0]
alt4 = [1.0, 1.0]
alt5 = [0.0, 0.33, 0.67, 1.0]
alt6 = [0.1, 0.35, 0.6, 0.85, 1.0]

In [8]:
print(f'Ideal: {np.std(ideal)}')
print(f'Alt 1: {np.std(alt1)}')
print(f'Alt 2: {np.std(alt2)}')
print(f'Alt 3: {np.std(alt3)}')
print(f'Alt 4: {np.std(alt4)}')
print(f'Alt 5: {np.std(alt5)}')
print(f'Alt 6: {np.std(alt6)}')

Ideal: 0.3535533905932738
Alt 1: 0.4472135954999579
Alt 2: 0.31622776601683794
Alt 3: 0.5
Alt 4: 0.0
Alt 5: 0.37343004699675686
Alt 6: 0.32649655434629016


### Test setting up an initial population

In [9]:
clubs = pd.read_csv('clubs_to_realign.csv')
clubs.head()

Unnamed: 0,club_no,n_quality
0,5509,0.677709
1,7036,0.695792
2,9682,0.614715
3,584009,0.4759
4,1100434,0.768516


In [10]:
len(clubs)/5

33.6

In [11]:
def chunks(clubs, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(clubs), n):
        yield clubs[i:i + n]

list(chunks(list(clubs['club_no']), 5))

[[5509, 7036, 9682, 584009, 1100434],
 [718, 4819, 9790, 5069647, 7575630],
 [2427, 1171849, 4801055, 7463287, 4154],
 [7817, 7881, 1412885, 7031829, 4182],
 [9354, 1526701, 1535564, 9469, 9598],
 [607240, 3812934, 4095, 942489, 1581643],
 [6891000, 3074518, 3408653, 5112712, 5042512],
 [5553533, 7022029, 7479372, 7554675, 4015],
 [730163, 1463775, 2556863, 7274, 8363],
 [1165752, 3240871, 6071, 1595518, 6754191],
 [6970706, 5869106, 6644914, 6887806, 713],
 [1190, 3549, 6975086, 7384295, 2364],
 [4108, 4110, 8412, 2912, 1176566],
 [8041, 8631, 2876291, 8552, 1588444],
 [4718634, 1171779, 6613239, 1291183, 2923054],
 [7532701, 6380, 7533, 9161, 7327347],
 [6523, 1047602, 6990556, 1331602, 1995527],
 [4750107, 5736, 630505, 3356972, 3372438],
 [4721, 695532, 845547, 1207, 6142],
 [6654663, 5055, 1176575, 2189079, 7306126],
 [4858, 5928, 9019, 5918, 8941],
 [1408278, 3063370, 7402713, 437, 584516],
 [4157985, 7479409, 6661, 3395235, 3401898],
 [8853, 583467, 2038660, 3431353, 6590],
 [74

### Make some sample districts
Make dummy districts with odd numbers to test having 1, 2, or 3 clubs left at the end of grouping. Use existing club data for the scores.

In [24]:
# Make a list of club numbers
not_club_no = pd.DataFrame(range(1, 160))
not_club_no['quality'] = clubs['n_quality'].copy()
not_club_no.rename(columns = {0: 'club_no'}, inplace=True)
len(not_club_no)

159

In [26]:
d_3_left = not_club_no[:158].copy()
len(d_3_left) % 5

3

In [27]:
d_2_left = not_club_no[:157].copy()
len(d_2_left) % 5

2

In [28]:
d_1_left = not_club_no[:156].copy()
len(d_1_left) % 5

1

In [32]:
d0 = not_club_no[:155].copy()
len(d0) % 5

0

Save these

In [33]:
d_3_left.to_csv('d3.csv', index=False)
d_2_left.to_csv('d2.csv', index=False)
d_1_left.to_csv('d1.csv', index=False)
d0.to_csv('d0.csv', index=False)

### Testing list structures
I seem to be having an issue with passing a list of lists when the toolbox wants to create additional nested lists.

In [4]:
l1 = [1, 2, 3, 4]
for x in l1:
    print(x)

1
2
3
4


In [5]:
1, 2, 3, 4

(1, 2, 3, 4)

In [9]:
tuple(list(tuple((1, 2, 3, 4))))

(1, 2, 3, 4)

In [10]:
[1, 2, 3], [4, 5, 6]

([1, 2, 3], [4, 5, 6])