In [467]:
import numpy as np
import pandas as pd
from heapq import heappush,heappop,heapify
def computeSchedule(df):
    t = 0
    schedule = 0
    for ii,row in df.iterrows():
        t+= row['Duration']
        weightedTime = row['Weight']
        schedule += weightedTime*t 
    return schedule

def buildDiGraph(data):
    
#     allUniqueNodes = np.unique(np.asarray(edges)[:,0:-1]).astype(int).tolist()
    G = {}
    for ii,row in enumerate(data):
        if row[0] not in G:
            G[row[0]] = [(row[1],row[2])]
        else:
            nodeWeightList = G[row[0]]
            nodeWeightList.append((row[1],row[2]))
            G.update({row[0]:nodeWeightList})
    
    temp = data[:,0].copy()
    data[:,0] = data[:,1]
    data[:,1] = temp
    for ii,row in enumerate(data):
        if row[0] not in G:
            G[row[0]] = [(row[1],row[2])]
        else:
            nodeWeightList = G[row[0]]
            nodeWeightList.append((row[1],row[2]))
            G.update({row[0]:nodeWeightList})
            
#     for node in allUniqueNodes:
#         if node not in G:
#             G.update({node:[(node,0)]})
    return G


def prims(G):

    inf = np.inf

    # Remove starting node and add to visited
    visited = set()
    visited.add(list(G.keys())[0])
    # Best edge weight
    bestEdgeWeight = {k:np.inf for k,v in G.items()}
    bestEdgeWeight[list(G.keys())[0]] = 0
    MST = 0
    for ii in range(len(G)-1):
        minWeight = inf
        for node in visited:
            for edge,weight in G[node]:
#                 print(node,weight)
                if edge not in visited:
                    if bestEdgeWeight[edge] > weight:
                        bestEdgeWeight[edge] = weight
                    if bestEdgeWeight[edge] < minWeight:
                        minWeight = bestEdgeWeight[edge]
                        minEdge = edge            
        MST+=minWeight
        visited.add(minEdge)
    
    return MST

import sys
class minHeap:
    def __init__(self):
        self.length = 0
        self.Heap = [-sys.maxsize]
        self.StartIndex = 1
        
        
    def parent(self,index):
        return index//2
    
    def leftChild(self,index):
        return 2*index
    
    def rightChild(self,index):
        return 2*index+1
    
    def isLeaf(self,index):
        return True if index > (self.length//2) and index <=self.length else False
    
    def swap(self,index1,index2):
        self.Heap[index1],self.Heap[index2] = self.Heap[index2],self.Heap[index1]
        
    # Insert by bubbling up
    def insert(self,element):
        self.length +=1
        self.Heap.append(element)
        
        current = self.length
        while self.Heap[current] < self.Heap[self.parent(current)]:
            self.swap(current,self.parent(current))
            current = self.parent(current)
            
    def top(self):
        if self.length >0:
            return self.Heap[1]
        else:
            return None
    
    # Maintain heap
    def heapify(self, index):
        # Check if node is a parent
        if not self.isLeaf(index):
            # Check if node is > children
            if self.rightChild(index) <= self.length:
                if self.Heap[index] > self.Heap[self.leftChild(index)] or self.Heap[index] > self.Heap[self.rightChild(index)]:
                    # Swap and heapify
                    if self.Heap[self.leftChild(index)] < self.Heap[self.rightChild(index)]:
                        self.swap(index, self.leftChild(index))
                        self.heapify(self.leftChild(index))
                    else:
                        self.swap(index, self.rightChild(index))
                        self.heapify(self.rightChild(index))
            else:
                if self.Heap[index] > self.Heap[self.leftChild(index)]:
                    self.swap(index, self.leftChild(index))
                    self.heapify(self.leftChild(index))
    
    def remove(self):
        popped = self.top()
        self.Heap[self.StartIndex] = self.Heap[self.length]
        self.Heap.pop()
        self.length-=1
        if self.length:
            self.heapify(self.StartIndex)
        return popped
    
    def minHeap(self):
        for index in range(self.size//2, 0, -1):
            self.heapify(index)
    
    # View heap
    def viewHeap(self):
        return self.Heap[1:]

## 1)

This file describes a set of jobs with positive and integral weights and lengths.  It has the format

[number_of_jobs]

[job_1_weight] [job_1_length]

[job_2_weight] [job_2_length]

...

For example, the third line of the file is "74 59", indicating that the second job has weight 74 and length 59.

You should NOT assume that edge weights or lengths are distinct.

Your task in this problem is to run the greedy algorithm that schedules jobs in decreasing order of the difference (weight - length).  Recall from lecture that this algorithm is not always optimal.  IMPORTANT: if two jobs have equal difference (weight - length), you should schedule the job with higher weight first.  Beware: if you break ties in a different way, you are likely to get the wrong answer.  You should report the sum of weighted completion times of the resulting schedule --- a positive integer --- in the box below. 

In [468]:
weightDuration = np.loadtxt('jobs.txt',skiprows=1)
priority = weightDuration[:,0]-weightDuration[:,1]
df = pd.DataFrame([weightDuration[:,0],weightDuration[:,1],priority]).T
df.columns = ['Weight','Duration','Priority']
df = df.sort_values(["Priority","Weight"],ascending=[False,False])
df

Unnamed: 0,Weight,Duration,Priority
448,99.0,1.0,98.0
684,100.0,3.0,97.0
4245,100.0,3.0,97.0
249,99.0,2.0,97.0
3757,99.0,2.0,97.0
...,...,...,...
9812,1.0,97.0,-96.0
5609,3.0,100.0,-97.0
6382,3.0,100.0,-97.0
8950,3.0,100.0,-97.0


In [469]:
computeSchedule(df)

69119377652.0

In [470]:
priority = weightDuration[:,0]/weightDuration[:,1]
df = pd.DataFrame([weightDuration[:,0],weightDuration[:,1],priority]).T
df.columns = ['Weight','Duration','Priority']
df = df.sort_values(["Priority","Weight"],ascending=[False,False])
df
        

Unnamed: 0,Weight,Duration,Priority
448,99.0,1.0,99.000000
703,98.0,1.0,98.000000
2259,95.0,1.0,95.000000
9545,95.0,1.0,95.000000
1024,93.0,1.0,93.000000
...,...,...,...
5185,1.0,96.0,0.010417
512,1.0,97.0,0.010309
4665,1.0,97.0,0.010309
8370,1.0,97.0,0.010309


In [471]:
computeSchedule(df)

67311454237.0

In [472]:
edges = np.loadtxt('edges.txt',skiprows=1).astype(int).tolist()
edges = np.asarray(edges)
G = buildDiGraph(edges)

In [473]:
MST = prims(G)
MST

-3612829

In [457]:
import networkx as nx
G = nx.read_weighted_edgelist('edges.txt')

In [458]:
G2 = nx.minimum_spanning_tree(G)
data = np.asarray(list(G2.edges(data=True)))[:,2]
mst = 0
for row in data:
    mst+=row['weight']
mst

-3612829.0

In [42]:
def primsHeap(G):
    def getMinWeight(tup):
        return tup[1]
    
#     e,w = min(G[visited[0]], key=getMinWeight)

    
    # Inialize two sets
    notVisited = minHeap()
    inf = np.inf
    ii=0
    for k,v in G.items():
        notVisited.insert((inf,ii,k))
        ii+=1
        
    # Remove starting node and add to visited
    visited = [notVisited.remove()]
    
    # Best edge weight
    bestEdgeWeight = {k:np.inf for k,v in G.items()}
    
    while notVisited.length:
        w,v = heappop(notVisited)
        edgeWeights = G[v]
        for e,w in edgeWeights:
            if e in notVisited:
                heappop(e)
                heapify(notVisited)
                notVisited
                # remove index of e then 
                # heapify
                bestEdgeWeight[e] = min(bestEdgeWeight[e],w)
                heappush(notVisited,(bestEdgeWeight[e],e))
            
        min(G[e], key=getMinWeight)
        return MST