NSGS Procedure for selecting a population with the largest mean

In [1]:
import numpy as np
import pandas as pd
import random as rd
from scipy import stats as st
import itertools

In [2]:
df = pd.read_excel (r'rinott.xlsx')
#print (df)

In [3]:
# set parameters from file
P1 = 0.95
n0 = 50
h = df.iloc[2,1]
deltastar = 0.15
P0 = 0.98

In [4]:
# set data points
data = df.iloc[10:,:]
#data

In [5]:
def calc_t(k, dfr, P0):
    return(st.t.ppf(P0**(1/(k-1)), dfr))

In [6]:
def check_largest(list, val):  
    # traverse in the list 
    for x in list: 
  
        # compare with all the values 
        # with val 
        if val <= x: 
            return (False)
    return (True)

In [7]:
def NSGS(P1, P0, n0, h, deltastar, data):
    
    # Stage 1

    # step (i)
    n0 = n0
    P0 = P0
    P1 = P1
    Pstar = (P0 + P1) - 1
    dfr = n0 - 1
    h = h
    k = data.shape[1]-1

    # step (ii)
    # define a frame that holds all samples (n0 rows, k batches)
    samples = np.empty([n0, data.shape[1]])
    # collect samples
    for i in np.arange(0, data.shape[1]-1, step = 1):
        samples[:,i] = data.iloc[:n0,i+1]#data.iloc[:,i+1].sample(n=n0, random_state=1)

    # step (iii)
    # summary statistics calculations
    stats = np.empty([data.shape[1]-1,4])
    for i in np.arange(0, data.shape[1]-1, step = 1):
        stats[i,0] = np.mean(samples[:,i])
        stats[i,1] = np.var(samples[:,i], ddof = 1)

    # step (iv)
    # calculate W's for pairwise sample differences
    w_rec = np.empty([data.shape[1]-1, data.shape[1]-1])
    for i,j in itertools.product(np.arange(0, data.shape[1]-1, step = 1), 
                               np.arange(0, data.shape[1]-1, step = 1)): 
        if i == j:
            w_rec[i,j] = 0
        elif i != j:
            w_rec[i,j] = calc_t(k, dfr, P0) * np.sqrt((stats[i,1] + stats[j,1])/(n0))

    # step (v)
    # define I
    I = []
    for i in np.arange(0,data.shape[1]-1, step = 1):
        gaps = []
        for j in np.arange(0,data.shape[1]-1, step = 1):
            if i != j:
                gaps.append(stats[j,0] - max(0, w_rec[i,j] - deltastar))
        if(check_largest(gaps, stats[i,0])):
            I.append(i)

    # step (v)
    if len(I) == 1:
        print("Sample", I[0], "is the rv associated with the largest mu of", round(max(stats[:,0]),3)) 
        return(stats)
    else:
        notI = np.delete(np.arange(0,data.shape[1]-1, step = 1), I, axis=0)
        print("Eliminated samples", notI+1, "in first stage")
        
        # Stage 2

        # step (i)
        I = np.array(I)
        samples_red = samples[:,I+1]
        stats_red = stats[I,:]
        for i in np.arange(0, samples_red.shape[1], step = 1):
            stats_red[i,2] = max(n0, np.ceil(((h*np.sqrt(stats_red[i,1]))/deltastar)**2))
            if stats_red[i,2] > n0:
                sample_ext = data.iloc[:,i+1].sample(n = int(stats_red[i,2] - n0), random_state = 2)

            # step (ii)        
                stats_red[i,3] = np.mean(np.concatenate((samples_red[:,i], sample_ext), axis = 0))
            else:
                stats_red[i,3] = np.mean(samples_red[:,i])

        # step (iii)
        istar = np.argmax(stats_red[:,3]) + 1

        # step (iv)
        print("Sample", istar, "is the rv associated with the largest mu of", round(max(stats_red[:,3]),3)) 
        return(stats_red)

In [8]:
results = NSGS(P1, P0, n0, h, deltastar, data)
results = np.round(results,3)
results = pd.DataFrame(results) # transform into data frame
results = results.rename(columns={0: "Ybar^0_i", 1: "S_i^2", 2: "n_i", 3: "Ybar_i"})
results = results.rename(index = {0: 1, 1: 2, 2: 3, 3: 6, 4: 7, 5: 8, 6: 9, 7: 10})
results.to_csv(index=True, path_or_buf = 'results.csv')
results

Eliminated samples [4 5] in first stage
Sample 6 is the rv associated with the largest mu of 4.202


Unnamed: 0,Ybar^0_i,S_i^2,n_i,Ybar_i
1,4.06,0.807,488.0,4.131
2,4.102,1.303,788.0,4.149
3,4.158,0.882,533.0,4.153
6,4.176,0.119,72.0,3.4
7,3.17,9.97,6024.0,4.106
8,4.132,12.469,7534.0,4.202
9,4.58,7.077,4276.0,4.003
10,4.389,11.788,7122.0,-2.605024e+246
