# Median Maintenance
<br>

The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 3 lecture on heap applications). The text file contains a list of the integers from 1 to 10000 in unsorted order; you should treat this as a stream of numbers, arriving one by one. 


## General Utilities

### Imports

In [1]:
#try to get this modulus
import heapdict
import heapq
import numpy as np
import sys
import os
#import pandas as pd
#import random as rnd
import copy
#interesting to create more easily dictionaries, from https://stackoverflow.com/questions/26367812/appending-to-list-in-python-dictionary
from collections import defaultdict
from collections import Counter
#import resource
import threading

In [2]:
#Careful recursion and stack preparation. Apparently absolutely necessary
sys.setrecursionlimit(4000)
#hardlimit = resource.getrlimit(resource.RLIMIT_STACK)[1]

### Import Graph
<br>
Note:
**OPTIONAL EXERCISE: Compare the performance achieved by heap-based and search-tree-based implementations of the algorithm.
**


In [3]:
inputFile="Median.txt"
inputList = []

with open(inputFile, 'r') as data:
    line = data.read().strip().split("\n")
inputList=list(map(int,line))

In [4]:
inputList[:10]

[6331, 2793, 1640, 9290, 225, 625, 6195, 2303, 5685, 1354]

### Test Cases
<br> from Github repo

In [5]:
import os
os.getcwd()

'D:\\Fabio\\Documents\\GitRps\\PrincetonAlgorithms'

In [6]:
path= os.getcwd()+'\\testMedian'
listNames=[]
with os.scandir(path) as listOfEntries:  
    for entry in listOfEntries:
        # print all entries that are files
        if entry.is_file():
            listNames.append(entry)
#print(listNames)

In [7]:
import fnmatch
path= os.getcwd()+'\\testMedian'
listOfFiles = os.listdir(path)  
pattern1 = "input_random_*"
pattern2 = "output_random_*"  
listInputsT=[]
listOutputsT=[]
for entry in listOfFiles:  
    if fnmatch.fnmatch(entry, pattern1):
        #print(entry)
        listInputsT.append(entry)
    elif fnmatch.fnmatch(entry, pattern2):
        #print(entry)
        listOutputsT.append(entry)

In [8]:
import re
listCases=[]
#extList=list(filter(re.search(r),listInputsT))
for fname in listInputsT:
    res = re.findall("(?<=input_random_)\w+", fname)
    if not res: continue
    listCases.append(res[0]) # You can append the result to a list

In [9]:
listCases

['10_40',
 '11_40',
 '12_40',
 '13_80',
 '14_80',
 '15_80',
 '16_80',
 '17_160',
 '18_160',
 '19_160',
 '1_10',
 '20_160',
 '21_320',
 '22_320',
 '23_320',
 '24_320',
 '25_640',
 '26_640',
 '27_640',
 '28_640',
 '29_1280',
 '2_10',
 '30_1280',
 '31_1280',
 '32_1280',
 '33_2560',
 '34_2560',
 '35_2560',
 '36_2560',
 '37_5120',
 '38_5120',
 '39_5120',
 '3_10',
 '40_5120',
 '41_10000',
 '42_10000',
 '43_10000',
 '44_10000',
 '4_10',
 '5_20',
 '6_20',
 '7_20',
 '8_20',
 '9_40']

In [10]:
#All are in perfect order so I don't think there is too much worry about matching
#Import into solution space
testInputs={}
testAnswers={}

for j in listCases:
    print('importing :',j)
    with open(path+'\\'+'output_random_'+j+'.txt', 'r') as data:
        testAnswers[j] = list(map(int,data.read().strip().split(",")))
    with open(path+'\\'+'input_random_'+j+'.txt', 'r') as data:
        testInputs[j] = list(map(int,data.read().strip().split("\n")))

#all the others are literally not needed

importing : 10_40
importing : 11_40
importing : 12_40
importing : 13_80
importing : 14_80
importing : 15_80
importing : 16_80
importing : 17_160
importing : 18_160
importing : 19_160
importing : 1_10
importing : 20_160
importing : 21_320
importing : 22_320
importing : 23_320
importing : 24_320
importing : 25_640
importing : 26_640
importing : 27_640
importing : 28_640
importing : 29_1280
importing : 2_10
importing : 30_1280
importing : 31_1280
importing : 32_1280
importing : 33_2560
importing : 34_2560
importing : 35_2560
importing : 36_2560
importing : 37_5120
importing : 38_5120
importing : 39_5120
importing : 3_10
importing : 40_5120
importing : 41_10000
importing : 42_10000
importing : 43_10000
importing : 44_10000
importing : 4_10
importing : 5_20
importing : 6_20
importing : 7_20
importing : 8_20
importing : 9_40


In [11]:
testAnswers['10_40']

[695]

In [12]:
testInputs['10_40']

[28,
 7,
 32,
 13,
 21,
 3,
 4,
 26,
 12,
 36,
 18,
 9,
 23,
 1,
 25,
 16,
 11,
 5,
 34,
 29,
 38,
 37,
 10,
 33,
 19,
 30,
 14,
 2,
 27,
 8,
 17,
 31,
 40,
 35,
 15,
 24,
 20,
 39,
 6,
 22]

## Working Code
<Br> Remember the algorithm using heap<br>
Why  Heap? Seems easier and with less code.<br>
We can work  with **HeapQ** because while it doesn't have a max, we can change the sign and cheat. We need the min of the ones larger and the max of the smallers.
<br>
Turn the smaller ones into negatives<br>
How to use input though?
<br> Remember that input is a stream. So perhaps a changing global variable?


In [27]:
def medMaint(newN):
    #receives as input a new number
    global lowH
    #remember the low heap has to be negative, so that we can use the min function as a max
    global highH
    #initialize minheap
    #initialize maxheap
    #these are empty. First number is automatically the median
    if len(lowH)==0:
        heapq.heappush(lowH,-newN)
        return -lowH[0]
    median=-lowH[0]
    if newN>median:
        #add to the higher heap
        heapq.heappush(highH,newN)
        #rebalance the two heaps
        #Remember they are allowed to be different at most by 1
        #Give slight advantage to lower heap
        if len(highH)>len(lowH):
               heapq.heappush(lowH,-heapq.heappop(highH))

    else:
        #add to lower heap
        heapq.heappush(lowH,-newN)
        #rebalance the two heaps
        #Remember they are allowed to be different at most by 1 
        if len(lowH)>len(highH)+1:
            heapq.heappush(highH,-heapq.heappop(lowH))
    return -lowH[0]

### Test Cases
<br>
In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits). That is, you should compute (m_1+m_2+m_3 + \cdots + m_{10000}) \bmod 10000(m 
1
​	 +m 
2
​	 +m 
3
​	 +⋯+m 
10000
​	 )mod10000.


In [28]:
#This is tupavov test array from forum
tup=[23,9,35,4,13,24,2,5,27,1,34,8,15,39,32,22,29,21,19,20,36,33,7,31,14,17,26,16,38,6,30,40,25,28,11,37,3,10,18,12]
global lowH
global highH
lowH=[]
highH=[]
heapq.heapify(lowH)
heapq.heapify(highH)
result=[]
for l in tup:
    result.append(medMaint(l))
    print('The median ',result[-1],'Sum of median ', np.sum(np.array(result)))
    print('Low Heap', lowH, 'High Heap', highH)

The median  23 Sum of median  23
Low Heap [-23] High Heap []
The median  9 Sum of median  32
Low Heap [-9] High Heap [23]
The median  23 Sum of median  55
Low Heap [-23, -9] High Heap [35]
The median  9 Sum of median  64
Low Heap [-9, -4] High Heap [23, 35]
The median  13 Sum of median  77
Low Heap [-13, -4, -9] High Heap [23, 35]
The median  13 Sum of median  90
Low Heap [-13, -4, -9] High Heap [23, 35, 24]
The median  13 Sum of median  103
Low Heap [-13, -4, -9, -2] High Heap [23, 35, 24]
The median  9 Sum of median  112
Low Heap [-9, -5, -4, -2] High Heap [13, 23, 24, 35]
The median  13 Sum of median  125
Low Heap [-13, -9, -4, -2, -5] High Heap [23, 27, 24, 35]
The median  9 Sum of median  134
Low Heap [-9, -5, -4, -2, -1] High Heap [13, 23, 24, 35, 27]
The median  13 Sum of median  147
Low Heap [-13, -5, -9, -2, -1, -4] High Heap [23, 27, 24, 35, 34]
The median  9 Sum of median  156
Low Heap [-9, -5, -8, -2, -1, -4] High Heap [13, 27, 23, 35, 34, 24]
The median  13 Sum of median  

In [33]:
#Test cases
sys.setrecursionlimit(800000)
threading.stack_size(67108864)


def main():
    #run this stuff
    for j in listCases:
        #reinitialize each time
        global lowH
        global highH
        lowH=[]
        highH=[]
        heapq.heapify(lowH)
        heapq.heapify(highH)
        result=[]
        slist=testInputs[j]
        print('Test Case ',j)
        for l in slist:
            result.append(medMaint(l))
        #print(result)
        print('Correct?',np.sum(np.array(result))%10000==testAnswers[j][0])
thread = threading.Thread(target=main)
thread.start()

Test Case  10_40
Correct? True
Test Case  11_40
Correct? True
Test Case  12_40
Correct? True
Test Case  13_80
Correct? True
Test Case  14_80
Correct? True
Test Case  15_80
Correct? True
Test Case  16_80
Correct? True
Test Case  17_160
Correct? True
Test Case  18_160
Correct? True
Test Case  19_160
Correct? True
Test Case  1_10
Correct? True
Test Case  20_160
Correct? True
Test Case  21_320
Correct? True
Test Case  22_320
Correct? True
Test Case  23_320
Correct? True
Test Case  24_320
Correct? True
Test Case  25_640
Correct? True
Test Case  26_640
Correct? True
Test Case  27_640
Correct? True
Test Case  28_640
Correct? True
Test Case  29_1280
Correct? True
Test Case  2_10
Correct? True
Test Case  30_1280
Correct? True
Test Case  31_1280
Correct? True
Test Case  32_1280
Correct? True
Test Case  33_2560
Correct? True
Test Case  34_2560
Correct? True
Test Case  35_2560
Correct? True
Test Case  36_2560
Correct? True
Test Case  37_5120
Correct? True
Test Case  38_5120
Correct? True
Test Case

### Real Case

<br>
Remember


In [34]:
#Test cases
sys.setrecursionlimit(800000)
threading.stack_size(67108864)


def main():
    #run this stuff
        #reinitialize each time
    global lowH
    global highH
    lowH=[]
    highH=[]
    heapq.heapify(lowH)
    heapq.heapify(highH)
    result=[]
    for l in inputList:
        result.append(medMaint(l))
    print(np.sum(np.array(result))%10000)#
thread = threading.Thread(target=main)
thread.start()

1213
