# Cluster configuration

### References:
* http://gpdb.docs.pivotal.io/4340/utility_guide/admin_utilities/gpexpand.html
* https://support.pivotal.io/hc/en-us/articles/201202707-gpexpand-example-working-with-one-host-


In [149]:
oldDensity = 12
newDensity = 16

hosts = set([ 'csia4gpw{segment:03d}'.format(segment=i+1) for i in range(70) ])

# This host was excluded from the cluster
hosts = hosts.difference(set(['csia4gpw030']))

In [150]:
numExistingSegments = oldDensity*len(hosts)

print "Currenly", numExistingSegments, "total segments across", len(hosts), "total hosts"
print "Resulting in", 2*oldDensity, "existing segments per host :", 
print oldDensity, "primaries and", oldDensity, "mirrors"

Currenly 828 total segments across 69 total hosts
Resulting in 24 existing segments per host : 12 primaries and 12 mirrors


In [151]:
numAdditionalSegments = (newDensity - oldDensity)*len(hosts)

print "Adding", numAdditionalSegments, "total segments across", len(hosts), "total hosts"
print "Resulting in", 2*numNewSegmentsPerHost, "additional segments per host :", 
print numNewSegmentsPerHost, "primaries and", numNewSegmentsPerHost, "mirrors"

Adding 276 total segments across 69 total hosts
Resulting in 8 additional segments per host : 4 primaries and 4 mirrors


In [152]:
# Add 2, one to skip the master segment, 
# one to exclude last existing segment
dbIbBase = numExistingSegments + 2

# Since content ID starts at zero, this
# calculation works as expected for 
# establishing the base of new ids
contentIdBase = numExistingSegments

# This is actually only primary segments,
# eacho host also takes this number of mirrors also
numNewSegmentsPerHost = (newDensity - oldDensity)

# Twice the number of new segments to account for mirrors
segmentDbIds = set(range(dbIbBase, dbIbBase + 2*numAdditionalSegments))

# Content ID's are unique to primary+mirror pairs
contentIds = range(contentIdBase, contentIdBase + numAdditionalSegments)

# Set the port bases to start from, 
# respecting existing allocations
primaryPortBase = 40000 + oldDensity
primaryRepPortBase = primaryPortBase + 1000
mirrorPortBase = 50000 + oldDensity
mirrorRepPortBase = mirrorPortBase + 1000

thesePrimaryPorts = range(primaryPortBase, primaryPortBase+numNewSegmentsPerHost)
thesePrimaryRepPorts = range(primaryRepPortBase, primaryRepPortBase+numNewSegmentsPerHost)
theseMirrorPorts = range(mirrorPortBase, mirrorPortBase+numNewSegmentsPerHost)
theseMirrorRepPorts = range(mirrorRepPortBase, mirrorRepPortBase+numNewSegmentsPerHost)

In [153]:
from random import randrange
import numpy as np
import pandas as pd

fieldLabels = ["Host", "Interface", "Port", "fselocation", "dbid", "Content", "Role", "ReplicationPort"]
sortedAssignments = pd.DataFrame(columns=fieldLabels)

# Pick k unique items from the available set, 
# return the list of k-items and the new avaialble 
# set (excluding the choices)
def getKIndicies(k, availableSet):
    theseChoices = set()
    availableList = list(availableSet)
    while len(theseChoices) < k:
        thisIndex = randrange(len(availableList))
        theseChoices.add(availableList[thisIndex])
      
    return availableSet.difference(theseChoices), theseChoices

# This keeps track of which db-ids have been assigned
assigned = set()

# This will store a collect of primary and mirror
# entries at each index location
assignments = list()

# For each host, generate a collection of new
# primary and mirror segments, respecting the
# relationship between them (content id)
for i in range(len(hosts)):
    # Shorten the varaible name
    nsph = numNewSegmentsPerHost
    
    # Get a collection of random, unique db ids
    # half for primary segments, half for mirro segments
    segmentDbIds, thisAssignment = getKIndicies(2*nsph, segmentDbIds)
    
    # Track that these db ids have been allocated
    assigned = assigned.union(thisAssignment)
    
    # Take a collection of content ids
    # half as many as db ids, since content id
    # will be the same for a pair of db ids
    theseContentIds = contentIds[nsph*i:nsph*i+nsph]
    
    # Store (zip) this collection of segment+primary entires into a tuple
    assignments.append(zip(thisAssignment, theseContentIds*2)) #, 
                           #thesePrimaryPorts + theseMirrorPorts,
                           #thesePrimaryRepPorts + theseMirrorRepPorts))

# This prints a group of entries, including the
# host and interface assignment
def printHostsLines(host, fields, mounts):
    global sortedAssignments
    
    # unpack (unzip) the tuple into respective fields
    dbIds, contentIds = zip(*fields) #, ports, repPorts = zip(*fields)
    
    # Offset primary and mirror main and replication 
    # ports based off of the mount point assignment
    # to avoid overlaping ports on a single interface
    ports = np.sum([[thesePrimaryPorts + theseMirrorPorts], [ mounts*2]], axis=1 )[0]-1
    repPorts = np.sum([[thesePrimaryRepPorts + theseMirrorRepPorts], [ mounts*2]], axis=1 )[0]-1
    
    # Build a string lookup table with
    # formatting specifiers to generate 
    # the fselocation field
    loc = dict(p='/data{}/primary/gp{}', m='/data{}/mirror/gp{}')
    
    # Loop through the collection of entries 
    # and output the syntacically-valid line
    for (d, c, p, rP, r, i, m) in zip(dbIds, contentIds, ports, repPorts, ['p']*2 + ['m']*2, [1, 2]*2, mounts):
        # Ensure main and replication ports
        # do not collide with preivous assignments
        if m in [3, 4]:
            p += 1
            rP += 1
        # do it
        thisTuple = (host, '-'.join([host, str(i)]), p, loc[r].format(m, c), d, c, r, rP)
        #print ':'.join([str(s) for s in list(thisTuple)])
        sortedAssignments = sortedAssignments.append(dict(zip(fieldLabels,thisTuple)), ignore_index=True)

# Make a list from our host set 
# to allow indexing in the loop
hostList = list(hosts)

# This assigns a primary and mirror collection 
# to every host respecting the mirroring constraint 
# of primary/mirror placement
#print ":".join(fieldLabels)
for h in range(len(hosts)/2):
    # Shorten the varaible name
    nsph = numNewSegmentsPerHost
    
    # Assign a collection of primary segments, 
    # alternating mount point assignment between /data1, /data2
    printHostsLines(hostList[h], assignments[h][:nsph], [1, 2]*2)
    # And assign an unrelated collection of mirror segments,
    # alternating mount point assignment between /data3, /data2
    printHostsLines(hostList[h], assignments[len(hosts)-h-1][:nsph], [3, 4]*2)
    
    # Assign a different collection of primary segments
    printHostsLines(hostList[h+len(hosts)/2], reversed(assignments[h][nsph:]), [1, 2]*2)
    # And assign a different collection of unrelated mirror segments
    printHostsLines(hostList[h+len(hosts)/2], reversed(assignments[len(hosts)-h-1][nsph:]), [3, 4]*2)
    

In [154]:
sortedAssignments = sortedAssignments.sort('dbid')
sortedAssignments

Unnamed: 0,Host,Interface,Port,fselocation,dbid,Content,Role,ReplicationPort
92,csia4gpw045,csia4gpw045-1,40012,/data3/primary/gp1083,830,1083,p,41012
205,csia4gpw032,csia4gpw032-2,40013,/data4/primary/gp1054,831,1054,p,41013
496,csia4gpw023,csia4gpw023-1,40011,/data1/primary/gp952,832,952,p,41011
33,csia4gpw037,csia4gpw037-2,40012,/data2/primary/gp837,833,837,p,41012
81,csia4gpw069,csia4gpw069-2,40012,/data2/primary/gp849,834,849,p,41012
385,csia4gpw040,csia4gpw040-2,40012,/data2/primary/gp925,835,925,p,41012
41,csia4gpw022,csia4gpw022-2,40012,/data2/primary/gp838,836,838,p,41012
294,csia4gpw046,csia4gpw046-1,40014,/data3/mirror/gp1030,837,1030,m,41014
455,csia4gpw026,csia4gpw026-2,40015,/data4/mirror/gp991,838,991,m,41015
272,csia4gpw047,csia4gpw047-1,40011,/data1/primary/gp896,839,896,p,41011


In [None]:
sortedAssignments.to_csv('gpexpandInput.txt', sep=":")