In [177]:
import numpy as np

## First try

### initial array

In [183]:
arr = [0.27903358, 0.66813685, 0.72073981, 0.54324703, 0.19464269, 
		   0.41102104, 0.19986179, 0.35450891, 0.22924373, 0.27188447, 0.17604976]

arr = np.array(arr)

In [184]:
def sample_std_dev(arr):
	return np.sqrt(np.sum(((arr-np.mean(arr))**2))/(len(arr)-1))


In [185]:
np.mean(arr)

0.36803360545454544

In [186]:
arr.std()

0.18557685530349188

In [187]:
sample_std_dev(arr)

0.1946346478578942

### let's insert a new value

In [188]:
new_val = 11
new_arr = np.array(list(arr)+[11])

In [189]:
new_arr.std()

2.9438869348898646

In [190]:
sample_std_dev(new_arr)

3.0747896221453903

In [191]:
old_mean = np.mean(arr)
old_population_std_dev = np.std(arr)
old_len = len(arr)

In [192]:
new_mean = ((old_mean*old_len)+new_val)/(old_len+1) # mean = sum(arr)/(len(arr))
new_mean

1.254030805

In [193]:
new_mean - np.mean(new_arr) # ok it worked

0.0

In [195]:
n = old_len+1
new_sample_variance = ((n-2)*old_population_std_dev**2 + (new_val-new_mean)*(new_val-old_mean))/(n-1)
n = n+1
new_std_variance = ((n-2)*old_population_std_dev**2 + (new_val-new_mean)*(new_val-old_mean))/(n-1)

new_sample_std_dev = np.sqrt(new_sample_variance)
new_population_std_dev = np.sqrt(new_std_variance)

In [196]:
new_population_std_dev

2.943886934889864

In [197]:
new_sample_std_dev

3.0742804724440442

In [198]:
# they're not exact but it's pretty close

# Function
Now we can organize this method and turn it into a function

In [199]:
def get_sample_std_dev(arr):
	return np.sqrt(np.sum(((arr-np.mean(arr))**2))/(len(arr)-1))

In [200]:
def get_new_mean(old_len, old_mean, new_val):
	return ((old_mean*old_len)+new_val)/(old_len+1) # mean = sum(arr)/(len(arr))

def get_new_dev(old_len, old_mean, new_val, old_dev, sample=False):
	new_mean = get_new_mean(old_len, old_mean, new_val)
	if sample:
		n = old_len+1
		return np.sqrt(((n-2)*old_dev**2 + (new_val-new_mean)*(new_val-old_mean))/(n-1))
	else:
		n = old_len+2 # 
		return np.sqrt(((n-2)*old_dev**2 + (new_val-new_mean)*(new_val-old_mean))/(n-1))

In [201]:
old_arr = [0.27903358, 0.66813685, 0.72073981, 0.54324703, 0.19464269, 
		   0.41102104, 0.19986179, 0.35450891, 0.22924373, 0.27188447, 0.17604976]
old_len = len(old_arr) # O(1) operation

old_mean = np.mean(old_arr) # O(n) operation
old_dev = np.std(old_arr) # O(n) operation

new_val = 11

### stdev

In [202]:
old_dev = np.std(old_arr) # O(n) operation
get_new_dev(old_len, old_mean, new_val, old_population_std_dev, sample=False)


2.943886934889864

In [203]:
new_arr = np.array(list(old_arr)+[new_val])
new_arr.std()

2.9438869348898646

### sample dev

In [204]:
old_sample_std_dev = get_sample_std_dev(old_arr)
get_new_dev(old_len, old_mean, new_val, old_sample_std_dev, sample=True)

3.07478962214539

In [205]:
new_arr = np.array(list(old_arr)+[new_val])
get_sample_std_dev(new_arr)

3.0747896221453903

# Testing

In [206]:
array_sizes = [10, 100, 10000]
insertion_sizes = [1, 10, 100, 1000, 10000]
#array_sizes = [10]
#insertion_sizes = [3]

for array_size in array_sizes:
	for insertion_size in insertion_sizes:
		
		# bootstrap
		old_arr = np.random.rand(array_size)
		
		new_values = np.random.rand(insertion_size)
		
		old_mean = np.mean(old_arr)
		old_population_std_dev = np.std(old_arr)
		old_sample_std_dev = sample_std_dev(old_arr)
		old_len = len(old_arr)

		# assume we don't have the old_arr anymore, we are only keeping it
		# to compute the error between np.std()/get_sample_std_dev and our method

		# update vlaues using our method
		for new_val in new_values[0:-1]:
			
			# only used to compute the error as well
			new_arr = np.array(list(old_arr)+[new_val])

			new_mean = get_new_mean(old_len, old_mean, new_val)
			new_population_std_dev = get_new_dev(old_len, old_mean, new_val, old_population_std_dev, sample=False)
			new_sample_std_dev = get_new_dev(old_len, old_mean, new_val, old_sample_std_dev, sample=True)

			old_arr = new_arr
			old_mean = new_mean
			old_population_std_dev = new_population_std_dev
			old_sample_std_dev = new_sample_std_dev
			old_len = old_len+1

		new_val = new_values[-1]
		
		new_arr = np.array(list(old_arr)+[new_val])

		new_mean = get_new_mean(old_len, old_mean, new_val)
		new_population_std_dev = get_new_dev(old_len, old_mean, new_val, old_population_std_dev, sample=False)
		new_sample_std_dev = get_new_dev(old_len, old_mean, new_val, old_sample_std_dev, sample=True)

		new_population_std_dev_error_pct = 100*(new_population_std_dev - np.std(new_arr))/np.std(new_arr)
		new_sample_std_dev_error_pct = 100*(new_sample_std_dev - get_sample_std_dev(new_arr))/get_sample_std_dev(new_arr)
		new_mean_error_pct = 100*(new_mean - np.mean(new_arr))/np.mean(new_arr)

		print("array_size: {}, insertion_size: {}, new_population_std_dev_error_pct: {:.4f}%, new_sample_std_dev_error_pct: {:.4f}%, new_mean_error_pct: {:.4f}%".\
			format(array_size, insertion_size, new_population_std_dev_error_pct, new_sample_std_dev_error_pct, new_mean_error_pct))
		
			
			

array_size: 10, insertion_size: 1, new_population_std_dev_error_pct: 0.0000%, new_sample_std_dev_error_pct: 0.0000%, new_mean_error_pct: 0.0000%
array_size: 10, insertion_size: 10, new_population_std_dev_error_pct: 0.0000%, new_sample_std_dev_error_pct: 0.0000%, new_mean_error_pct: 0.0000%
array_size: 10, insertion_size: 100, new_population_std_dev_error_pct: -0.0000%, new_sample_std_dev_error_pct: 0.0000%, new_mean_error_pct: 0.0000%
array_size: 10, insertion_size: 1000, new_population_std_dev_error_pct: 0.0000%, new_sample_std_dev_error_pct: 0.0000%, new_mean_error_pct: -0.0000%
array_size: 10, insertion_size: 10000, new_population_std_dev_error_pct: -0.0000%, new_sample_std_dev_error_pct: -0.0000%, new_mean_error_pct: 0.0000%
array_size: 100, insertion_size: 1, new_population_std_dev_error_pct: 0.0000%, new_sample_std_dev_error_pct: 0.0000%, new_mean_error_pct: 0.0000%
array_size: 100, insertion_size: 10, new_population_std_dev_error_pct: -0.0000%, new_sample_std_dev_error_pct: 0.00