# Graph II - AdjListGraph #

<img src="../images/ch17/listrepr.png" width="640"/>

In [1]:
import sys
class Vertex:
    def __init__(self, node):
        self.id = node
        self.adjacent = {} # {Vertex:weigh}
        # Set distance to infinity for all nodes
        self.distance = sys.maxsize
        # Mark all nodes unvisited        
        self.visited = False  
        # Predecessor
        self.previous = None

    def addNeighbor(self, neighbor, weigh = 0):
        self.adjacent[neighbor] = weigh
        
    def getConnections(self):
        return self.adjacent.keys()
        
    def getVertexID(self):
        return self.id
        
    def getWeight(self, neighbor):
        return self.adjacent[neighbor]
        
    def setPrevious(self, prev):
        self.previous = prev

    def setVisited(self):
        self.visited = True
        
    def __str__(self):
        return self.id + ' adjacent: ' + str([x for x in self.adjacent.keys()])
        
    
    def __lt__(self, other):
         return self.id < other.id
        

class Graph:
    def __init__(self, directed=False):
        # key is string, vertex id
        # value is Vertex
        self.vertDictionary = {} # {id:Vertex}
        self.numVertices = 0
        self.directed = directed
        
    def __iter__(self):
        return iter(self.vertDictionary.values())

    def isDirected(self):
        return self.directed
    
    def vectexCount(self):
        return self.numVertices

    def addVertex(self, node):
        newVertex = Vertex(node)
        self.numVertices += 1
        self.vertDictionary[node] = newVertex
        return newVertex

    def getVertex(self, n):
        if n in self.vertDictionary:
            return self.vertDictionay[n]
        else:
            return None

    def addEdge(self, frm, to, cost=0):
        if frm not in self.vertDictionary:
            self.addVertex(frm)
        if to not in self.vertDictionary:
            self.addVertex(to)
        self.vertDictionary[frm].addNeighbor(self.vertDictionary[to], cost)
        if not self.directed:
            self.vertDictionary[to].addNeighbor(self.vertDictionary[frm], cost)

    def getVertices(self):
        return self.vertDictionary.keys()

    def setPrevious(self, current, prev):
        if current in self.vertDictionary:
            current.setPrevious(prev)

    def getPrevious(self, current):
        if current in self.vertDictionary:
            return current.getPrevious()

    def getEdges(self):
        edges = []
        for keys, vertex in self.vertDictionary.items():
            for neighbor in vertex.getConnections():
                edges.append((vertex.getVertexID(), neighbor.getVertexID(), vertex.getWeight(neighbor))) #tuple
        return edges
    
    def getNeighbors(self, v):
        if v in self.vertDictionary:
            vertex = self.vertDictionary[v]
            return vertex.getConnections()
        
G = Graph(True)
G.addVertex('a')
G.addVertex('b')
G.addVertex('c')
G.addVertex('d')
G.addVertex('e')
G.addVertex('f')
G.addEdge('a', 'b', 1)  
G.addEdge('a', 'c', 1)
G.addEdge('b', 'd', 1)
G.addEdge('b', 'e', 1)
G.addEdge('c', 'd', 1)
G.addEdge('c', 'e', 1)
G.addEdge('d', 'e', 1)
G.addEdge('e', 'a', 1)
print (G.getEdges())
for k in G.getEdges():
    print(k)

[('a', 'b', 1), ('a', 'c', 1), ('b', 'd', 1), ('b', 'e', 1), ('c', 'd', 1), ('c', 'e', 1), ('d', 'e', 1), ('e', 'a', 1)]
('a', 'b', 1)
('a', 'c', 1)
('b', 'd', 1)
('b', 'e', 1)
('c', 'd', 1)
('c', 'e', 1)
('d', 'e', 1)
('e', 'a', 1)


In [2]:
# DFS:
def DFS(S, G, visited, parents):
    if S == G:
        return
    for n in S.getConnections():
        if n not in visited:
            visited.add(n.getVertexID())
            parents[n] = S
            DFS(n, G, visited, parents)

In [3]:
# DFS_traversal:
def DFSTraversal(S, G):
    visited = []
    parents = {}
    stack = [S]
    while stack:
        cur = stack.pop()
        for neighbor in cur.getNeighbor():
            if neighbor not in visited:
                visited.add(n.getVertexID())
                parents[neighbor] = cur.getVertexID()
                stack.append(neighbor)

# Dijkstra Algorithm #

In [4]:
from AdjListGraph import Graph
from AdjListGraph import Vertex
import heapq 

def dijkstra(G, source, destination):
    """
    G: Graph
    source: source vertex
    destination: destination vertex
    """
    source.setDistance(0) # the distance of source to source is 0
    unvisitedQueue = [(v.getDistance(), v) for v in G] # use the distance as standard, popping the shortest distance vertex every times
    heapq.heapify(unvisitedQueue)
    
    # 如果有一个点和任何一个点都没有联系会如何？
    while len(unvisitedQueue):
        # pop a vertex with smallest distance
        uv = heapq.heappop(unvisitedQueue)
        cur = uv[1] # get the vertex
        cur.setVisited() # visited means that the distance of cur has been shortest
        
        for nx in cur.adjacent: # find all of neighbor
            if nx.visited:
                continue
            newDistance = cur.getDistance() + cur.getWeight(nx)
            
            if newDistance < nx.getDistance():
                nx.setDistance(newDistance)
                nx.setPrevious(cur)
                print('Updated : current = %s next = %s newDist = %s' \
                        % (cur.getVertexID(), nx.getVertexID(), nx.getDistance()))
            else:
                print('Not updated : current = %s next = %s newDist = %s' \
                        % (cur.getVertexID(), nx.getVertexID(), nx.getDistance()))
        
def shortest(v, path):
    if v.previous:
        path.append(v.previous.getVertexID())
        shortest(v.previous, path)
    return 

G = Graph(True)
G.addVertex('a')
G.addVertex('b')
G.addVertex('c')
G.addVertex('d')
G.addVertex('e')
G.addEdge('a', 'b', 4)  
G.addEdge('a', 'c', 1)
G.addEdge('c', 'b', 2)
G.addEdge('b', 'e', 4)
G.addEdge('c', 'd', 4)
G.addEdge('d', 'e', 4)

for v in G:
    for w in v.getConnections():
        vid = v.getVertexID()
        wid = w.getVertexID()
        print('( %s , %s, %3d)' % (vid, wid, v.getWeight(w)))

source = G.getVertex('a')
destination = G.getVertex('e')    
dijkstra(G, source, destination) 

for v in G.vertDictionary.values():
    print(source.getVertexID(), " to ", v.getVertexID(), "-->", v.getDistance())

path = [destination.getVertexID()]
shortest(destination, path)
print ('The shortest path from a to e is: %s' % (path[::-1]))

( a , b,   4)
( a , c,   1)
( b , e,   4)
( c , b,   2)
( c , d,   4)
( d , e,   4)
Updated : current = a next = b newDist = 4
Updated : current = a next = c newDist = 1
Updated : current = c next = b newDist = 3
Updated : current = c next = d newDist = 5
Updated : current = b next = e newDist = 7
Not updated : current = d next = e newDist = 7
a  to  a --> 0
a  to  b --> 3
a  to  c --> 1
a  to  d --> 5
a  to  e --> 7
The shortest path from a to e is: ['a', 'c', 'b', 'e']


# Graph Maze #

### <a id='Ex1'>Ex.1 The Maze</a>

There is a ball in a maze with empty spaces and walls. The ball can go through empty spaces by rolling up, down, left or right.

Given the ball's start position, the destination and the maze, determine whether the ball could stop at the destination.

The maze is represented by a binary 2D array. 1 means the wall and 0 means the empty space. You may assume that the borders of the maze are all walls. The start and destination coordinates are represented by row and column indexes.


<img src="../images/ch17/maze1.png" width="560"/>

In [1]:
def dfs(matrix, start, dest):
    visited = [[False]*len(matrix[0]) for _ in range(len(matrix))]
    return dfsHelper(matrix, start, dest, visited)
    
def dfsHelper(matrix, start, dest, visited):
    if start[0] == dest[0] and start[1] == dest[1]:
        return True
    
    direction = [[0,1], [0,-1], [1,0], [-1,0]]
    
    for d in direction:
        x = start[0] + d[0]
        y = start[1] + d[1]
        if x < 0 or x > len(matrix) - 1 or y < 0 or y > len(matrix[0]) - 1:
            continue
        if matrix[x][y] == 1:
            continue
        if visited[x][y]:
            continue
        visited[x][y] = True
        if(dfsHelper(matrix, [x, y], dest, visited)):
            return True
    
    return False


matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 1, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 0)
dest  = (4, 4)
print(dfs(matrix, start, dest))

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 0)
dest  = (4, 4)
dfs(matrix, start, dest)
print(dfs(matrix, start, dest))

False
True


In [28]:
def dfsIterative(matrix, start, dest):
    visited = [[False]*len(matrix[0]) for _ in range(len(matrix))]
    stack = []
    stack.append(start)
    visited[start[0]][start[1]] = True
    dirction = [[0,1], [0,-1], [1,0], [-1,0]]
    
    while len(stack) > 0:
        cur = stack.pop()
        if cur[0] == dest[0] and cur[1] == dest[1]:
            return True
        for dirc in dirction:
            x = cur[0] + dirc[0]
            y = cur[1] + dirc[1]

            if x < 0 or x >= len(matrix) or y < 0 or y >= len(matrix[0]):
                continue
            if visited[x][y] is True:
                continue
            if matrix[x][y] == 1:
                continue
            visited[x][y] = True

            stack.append((x,y))
    return False

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 1, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 0)
dest  = (4, 4)
print(dfsIterative(matrix, start, dest))

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 0)
dest  = (4, 4)
dfs(matrix, start, dest)
print(dfsIterative(matrix, start, dest))
        
    

False
True


### <a id='Ex2'>Ex.2 The Maze II</a>

There is a ball in a maze with empty spaces and walls. The ball can go through empty spaces by rolling up, down, left or right. <font color='Red'>but it won't stop rolling until hitting a wall. When the ball stops, it could choose the next direction.</font>

Given the ball's start position, the destination and the maze, determine whether the ball could stop at the destination.

The maze is represented by a binary 2D array. 1 means the wall and 0 means the empty space. You may assume that the borders of the maze are all walls. The start and destination coordinates are represented by row and column indexes.

<img src="../images/ch17/maze2.png" width="640"/>
<img src="../images/ch17/maze3.png" width="640"/>

In [32]:
def dfs2(matrix, start, dest):
    visited = [[False]*len(matrix[0]) for _ in range(len(matrix))]
    return dfs2Helper(matrix, start, dest, visited)

def dfs2Helper(matrix, start, dest, visited):
    if matrix[start[0]][start[1]] == 1:
        return False
    if visited[start[0]][start[1]]:
        return False
    if start[0] == dest[0] and start[1] == dest[1]:
        return True
    
    visited[start[0]][start[1]] = True
    
    r = start[1] + 1
    l = start[1] - 1
    u = start[0] - 1
    d = start[0] + 1
    
    while r < len(matrix[0]) and matrix[start[0]][r] == 0:
        r += 1
    newNeighbor = (start[0], r - 1)
    if(dfs2Helper(matrix, newNeighbor, dest, visited)):
        return True
    
    while l > 0 and matrix[start[0]][l] == 0:
        l -= 1
    newNeighbor = (start[0], l + 1)
    if(dfs2Helper(matrix, newNeighbor, dest, visited)):
        return True
    
    while u > 0 and matrix[u][start[1]] == 0:
        u -= 1
    newNeighbor = (u + 1, start[1])
    if(dfs2Helper(matrix, newNeighbor, dest, visited)):
        return True
    
    while d < len(matrix) and matrix[d][start[1]] == 0:
        d += 1
    newNeighbor = (d - 1, start[1])
    if(dfs2Helper(matrix, newNeighbor, dest, visited)):
        return True
    
    return False

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 4)
dest  = (4, 4)
print(dfs2(matrix, start, dest))



True


In [37]:
def dfs2(matrix, start, dest):
    visited = [[False]*len(matrix[0]) for _ in range(len(matrix))]
    return dfs2Helper(matrix, start, dest, visited)

def dfs2Helper(matrix, start, dest, visited):
    if start[0] == dest[0] and start[1] == dest[1]:
        return True
    
    direction = [[0,1], [0,-1], [1,0], [-1,0]]
    
    for d in direction:
        x = start[0]
        y = start[1]
        while 0 <= x + d[0] < len(matrix) and 0 <= y + d[1] < len(matrix[0]) and matrix[x+d[0]][y+d[1]] == 0:
            x = x + d[0]
            y = y + d[1]
        print(x, y)
        if x < 0 or x > len(matrix) - 1 or y < 0 or y > len(matrix[0]) - 1:
            continue
        if matrix[x][y] == 1:
            continue
        if visited[x][y]:
            continue
        visited[x][y] = True
        if dfs2Helper(matrix, (x, y), dest, visited):
            return True
    return False

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 4)
dest  = (4, 4)
print(dfs2(matrix, start, dest))

0 4
0 4
0 3
0 4
0 3
1 3
1 4
1 4
1 0
1 4
1 0
2 0
2 2
2 2
2 0
4 2
4 4
True


### <a id='Ex3'>Ex.3 The Maze III</a>

There is a ball in a maze with empty spaces and walls. The ball can go through empty spaces by rolling up, down, left or right, but it won't stop rolling until hitting a wall. When the ball stops, it could choose the next direction.

Given the ball's start position, the destination and the maze, <font color='red'>find the shortest distance for the ball to stop at the destination</font>. The distance is defined by the number of empty spaces traveled by the ball from the start position (excluded) to the destination (included). If the ball cannot stop at the destination, return -1.

The maze is represented by a binary 2D array. 1 means the wall and 0 means the empty space. You may assume that the borders of the maze are all walls. The start and destination coordinates are represented by row and column indexes.

<img src="../images/ch17/maze4.png" width="640"/>
<img src="../images/ch17/maze5.png" width="640"/>

In [42]:
def shortestDistance(matrix, start, destination):
    heap = [(0, start)]
    visited = set()
    
    while heap:
        dist, node = heapq.heappop(heap)
        if node in visited: # if node in visited, that means node alread has a shortest distance to start node
            continue
        visited.add(node)
        if node == destination:
            return dist
        for dirc in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
            x = node[0]
            y = node[1]
            neighborDist = dist
            while 0 <= x + dirc[0] < len(matrix) and 0 <= y + dirc[1] < len(matrix[0]) and matrix[x+dirc[0]][y+dirc[1]] == 0:
                x = x + dirc[0]
                y = y + dirc[1]
                neighborDist += 1
            print(x, y, neighborDist)
            if (x, y) not in visited:# 首次错误：把(x, y)写成node
                heapq.heappush(heap, (neighborDist, (x, y)))

    return -1

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 4)
dest  = (4, 4)
shortestDistance(matrix, start, dest)

0 4 0
0 3 1
2 4 2
0 4 0
0 4 2
0 3 1
1 3 2
0 3 1
1 4 3
1 0 5
1 3 2
0 3 3
2 4 2
2 4 2
2 4 2
0 4 4
1 4 3
1 0 7
2 4 4
0 4 4
1 4 9
1 0 5
2 0 6
0 0 6
0 1 7
0 0 6
2 0 8
0 0 6
2 2 8
2 0 6
2 0 6
0 0 8
0 1 7
0 0 8
2 1 9
0 1 7
2 2 8
2 0 10
4 2 10
1 2 9
1 4 11
1 0 11
4 2 12
1 2 9
2 2 10
2 0 10
2 1 9
0 1 11
4 4 12
4 0 12
4 2 10
1 2 13
4 4 16
4 0 12
4 0 12
4 0 12


12

### <a id='Ex4'>Ex.4 The Maze IV</a>

There is a ball in a maze with empty spaces and walls. The ball can go through empty spaces by rolling up (u), down (d), left (l) or right (r), but it won't stop rolling until hitting a wall. When the ball stops, it could choose the next direction. There is also a hole in this maze. <font color="red">The ball will drop into the hole if it rolls on to the hole</font>.

Given the ball position, the hole position and the maze, find out how the ball could drop into the hole by moving the shortest distance. The distance is defined by the number of empty spaces traveled by the ball from the start position (excluded) to the hole (included). Output the moving directions by using 'u', 'd', 'l' and 'r'. Since there could be several different shortest ways, you should output the lexicographically smallest way. If the ball cannot reach the hole, output "impossible".

The maze is represented by a binary 2D array. 1 means the wall and 0 means the empty space. You may assume that the borders of the maze are all walls. The ball and the hole coordinates are represented by row and column indexes.

In [43]:
def findShortestWay(maze, ball, hole):
    dirs = {'u' : (-1, 0), 'r' : (0, 1), 'l' : (0, -1), 'd': (1, 0)}
    heap = [(0, '', ball)]  #(distance, path, node)
    visited = set()
    
    while heap:
        dist, path, node = heapq.heappop(heap)
        if node in visited:
            continue
        if node == hole:
            return path
        visited.add(node)
        
        for d, step in dirs.items():
            x = node[0]
            y = node[1]
            neighborDist = dist
            while 0 <= x+step[0] < len(maze) and 0 <= y+step[1] < len(maze[0]) and maze[x+step[0]][y+step[1]] == 0:
                x = x + step[0]
                y = y + step[1]
                neighborDist += 1
            if (x, y) not in visited:
                heapq.heappush(heap, (neighborDist, path+d, (x, y)))
    
    return 'impossible'

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

start = (0, 0)
dest  = (1, 4)
findShortestWay(matrix, start, dest)

'drur'

### <a id='Ex1'>Ex.1 Flood Fill</a>

An image is represented by a 2-D array of integers, each integer representing the pixel value of the image (from 0 to 65535).

Given a coordinate (sr, sc) representing the starting pixel (row and column) of the flood fill, and a pixel value newColor, "flood fill" the image.

To perform a "flood fill", consider the starting pixel, plus any pixels connected 4-directionally to the starting pixel of the same color as the starting pixel, plus any pixels connected 4-directionally to those pixels (also with the same color as the starting pixel), and so on. Replace the color of all of the aforementioned pixels with the newColor.

At the end, return the modified image.

In [2]:
def floodFill(image, sr, sc, newColor):
    rows, cols, originColor = len(image), len(image[0]), image[sr][sc]
    floodFill_dfs(image, rows, cols, originColor, newColor)
    return image

def floodFill_dfs(image, row, col, originColor, newColor):
    if 0 <= row < len(image) and 0 <= col < len(image) and image[row][col] == originColor:
        image[row][col] = newColor
        for (x, y) in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
            floodFill_dfs(image, row + x, col + y, originColor, newColor)        

image = [
    [1,1,1],
    [1,1,0],
    [1,0,1]
]
sr = 1
sc = 1
newColor = 2
floodFill(image, sr, sc, newColor)

[[1, 1, 1], [1, 1, 0], [1, 0, 1]]

### <a id='Ex2'>Ex.2 Friend Circles</a>

There are N students in a class. Some of them are friends, while some are not. Their friendship is transitive in nature. For example, if A is a direct friend of B, and B is a direct friend of C, then A is an indirect friend of C. And we defined a friend circle is a group of students who are direct or indirect friends.

Given a N*N matrix M representing the friend relationship between students in the class. If M[i][j] = 1, then the ith and jth students are direct friends with each other, otherwise not. And you have to output the total number of friend circles among all the students.

Input: 

[[1,1,0],

[1,1,0],

[0,0,1]]

Output: 2

Explanation:The 0th and 1st students are direct friends, so they are in a friend circle. 

The 2nd student himself is in a friend circle. So return 2.

# Solutions:
每一行都表示其中一个人，因此我们可以针对每个人来统计一下朋友圈的个数。先找到A，然后找到与A有关的B，然后去找与B有关的C，以此类推。调查过的朋友圈全部记成0。

In [46]:
def findCircleNum(M):
    circles = 0
    stack = []
    for i in range(len(M)):
        if M[i][i] == 0:
            continue
        stack = [i]
        while stack:
            cur = stack.pop()
            if M[cur][cur] == 0:
                continue
            M[cur][cur] = 0
            for j in range(len(M)):
                if M[cur][j] != 0 and M[j][j] != 0:
                    stack.append(j)
        circles += 1
    return circles

M = [
     [1,1,0],
     [1,1,0],
     [0,0,1]]
findCircleNum(M)
        

2

In [50]:
def findCircleNum(M):
    circles = 0
    for i in range(len(M)):
        if M[i][i] == 0:
            continue
        if(findCircleNum_dfs(M, i)):
            circles += 1
    return circles

def findCircleNum_dfs(M, cur):
    if M[cur][cur] == 0:
        return False
    M[cur][cur] = 0
    for j in range(len(M)):
        if M[cur][j] != 0 and M[j][j]:
            findCircleNum_dfs(M, j)
    return True

M = [
     [1,1,0],
     [1,1,0],
     [0,0,1]]
findCircleNum(M)


2

### <a id='Ex3'>Ex.3 Number of Islands</a>

Given a 2d grid map of '1's (land) and '0's (water), count the number of islands. An island is surrounded by water and is formed by connecting adjacent lands horizontally or vertically. You may assume all four edges of the grid are all surrounded by water.

In [53]:
def numOfLands(grid):
    if not grid:
        return 0
    count = 0
    for i in range(len(grid)):
        for j in range(len(grid[0])):
            if grid[i][j] == 1:
                dfs(grid, i, j)
                count += 1
    return count
                
def dfs(grid, i, j):
    if grid[i][j] == 0:
        return 0
    grid[i][j] = 0
    for (x, y) in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
        if 0 <= i + x < len(grid) and 0 <= j + y < len(grid[0]) and grid[i+x][j+y] == 1:
            dfs(grid, i + x, j + y)

M = [
     [1,1,0],
     [1,1,0],
     [0,0,1]
]
numOfLands(M)

2

### <a id='Ex4'>Ex.4 Max Area of Island</a>

Given a non-empty 2D array grid of 0's and 1's, an island is a group of 1's (representing land) connected 4-directionally (horizontal or vertical.) You may assume all four edges of the grid are surrounded by water.

Find the maximum area of an island in the given 2D array. (If there is no island, the maximum area is 0.)

[0,0,1,0,0,0,0,1,0,0,0,0,0],

 [0,0,0,0,0,0,0,1,1,1,0,0,0], 
 
 [0,1,1,0,1,0,0,0,0,0,0,0,0], 
 
 [0,1,0,0,1,1,0,0,1,0,1,0,0],
 
 [0,1,0,0,1,1,0,0,1,1,1,0,0], 
 
 [0,0,0,0,0,0,0,0,0,0,1,0,0],
 
 [0,0,0,0,0,0,0,1,1,1,0,0,0],
 
 [0,0,0,0,0,0,0,1,1,0,0,0,0]
 
Given the above grid, return 6. Note the answer is not 11, because the island must be connected 4-directionally.

In [55]:
def maxAreaOfIsland(grid):
    rows, cols = len(grid), len(grid[0])
    result = 0
    for i in range(rows):
        for j in range(cols):
            result = max(result, dfs(grid, i, j))
    return result

def dfs(grid, i, j):
    if 0 <= i < len(grid) and 0 <= j < len(grid[0]) and grid[i][j] == 1:
        grid[i][j] = 0 # 记得标记已经visit过的
        return 1 + dfs(grid, i + 1, j) + dfs(grid, i - 1, j) + dfs(grid, i, j + 1) + dfs(grid, i, j - 1)
    return 0

matrix = [
    [0, 0, 1, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 1, 0],
    [1, 1, 0, 1, 1],
    [0, 0, 0, 0, 0]
]

maxAreaOfIsland(matrix)


3

### <a id='Ex5'>Ex.5 Employee Importance</a>

You are given a data structure of employee information, which includes the employee's unique id, his importance value and his direct subordinates' id.

For example, employee 1 is the leader of employee 2, and employee 2 is the leader of employee 3. They have importance value 15, 10 and 5, respectively. Then employee 1 has a data structure like [1, 15, [2]], and employee 2 has [2, 10, [3]], and employee 3 has [3, 5, []]. Note that although employee 3 is also a subordinate of employee 1, the relationship is not direct.

Now given the employee information of a company, and an employee id, you need to return the total importance value of this employee and all his subordinates.

Input: [[1, 5, [2, 3]], [2, 3, []], [3, 3, []]], 1

Output: 11

Explanation:

Employee 1 has importance value 5, and he has two direct subordinates: employee 2 and employee 3. They both have importance value 3. So the total importance value of employee 1 is 5 + 3 + 3 = 11.

# Solutions:
1. 创建一个abstract data type去存储数据
2. 创建一个dict，用于根据id来寻找特定的node
3. 根据给定的id，使用dfs，返回value的和

In [59]:
class Employee:
    def __init__(self, id, importance, subordinates):
        self.id = id
        self.importance = importance
        self.subordinates = subordinates

def getImportance(employees, id):
    table = {emp.id : emp for emp in employees}
    return dfs(table, id)

def dfs(table, id):
    value = table[id].importance
    for sub in table[id].subordinates:
        value += dfs(table, sub)
    return value

e3 = Employee(3, 3, [])
e2 = Employee(2, 3, [])
e1 = Employee(1, 5, [2, 3])
emps = [e1, e2, e3]
getImportance(emps, 1)

11

In [62]:
def getImportance(employees, id):
    stack = [id]
    table = {emp.id : emp for emp in employees}
    value = 0
    while stack:
        cur = stack.pop()
        value += table[cur].importance
        for sub in table[cur].subordinates: # first error: write table[cur] as table[id]
            stack.append(sub)
    return value

e3 = Employee(3, 3, [])
e2 = Employee(2, 3, [])
e1 = Employee(1, 5, [2, 3])
emps = [e1, e2, e3]
getImportance(emps, 1)

11

# Graph Practice III #

### <a id='Ex1'>Ex.1 Is Graph Bipartite?</a>

Given an undirected graph, return true if and only if it is bipartite.

Recall that a graph is bipartite if we can split it's set of nodes into two independent subsets A and B such that every edge in the graph has one node in A and another node in B.

The graph is given in the following form: graph[i] is a list of indexes j for which the edge between nodes i and j exists.  Each node is an integer between 0 and graph.length - 1.  There are no self edges or parallel edges: graph[i] does not contain i, and it doesn't contain any element twice.

<img src="../images/ch17/bipartite1.png" width="440"/>
<img src="../images/ch17/bipartite2.png" width="540"/>

# Solutions
1. 创建一个visited dict, key为当前点的id(0,1,2,3...), value为1(第一组)和-1(第二组)
2. 遍历所有点，从第一个点开始，先把点标记为1，其neighbor标记为-1
3. dfs: 如果自身未visited，组别与cur取反。如果自身已visited，判断与cur是否相反，若相同返回False

In [3]:
def isBipartite(graph):
    visited = {}
    for i in range(len(graph)):
        if i in visited:
            continue
        # 注意这里，由于dfs会先针对前面的点进行遍历，那么如果这层循环中有一个点不在visited也就表示这个点与dfs遍历的所有点没有联系
        # 因此直接把其加入group1即可，那么就可能如下的图像
        # x
        # x
        # 发现上下并没有联系
        visited[i] = 1 
        if not dfs(graph, visited, i):
            return False
    return True

def dfs(graph, visited, node):
    neighbors = graph[node]
    for j in neighbors:
        if j in visited:
            if visited[j] == visited[node]:
                return False
        else:
            visited[j] = visited[node] * -1
            dfs(graph, visited, j)
    return True

graph = [[1,3], [0,2], [1,3], [0,2]]
print(isBipartite(graph))

graph = [[1,2,3], [0,2], [0,1,3], [0,2]]
print(isBipartite(graph))

True
False


### <a id='Ex2'>Ex.2 Pacific Atlantic Water Flow</a>

Given an m x n matrix of non-negative integers representing the height of each unit cell in a continent, the "Pacific ocean" touches the left and top edges of the matrix and the "Atlantic ocean" touches the right and bottom edges.

Water can only flow in four directions (up, down, left, or right) from a cell to another one with height equal or lower.

Find the list of grid coordinates where water can flow to both the Pacific and Atlantic ocean.

<img src="../images/ch17/ocean.png" width="740"/>

# Solution:
如果根据每一个点找一次他们是否能到达pacific和atlantic，那么会需要n*n*n^2，即O(n^4)的时间复杂读，因此可以从反向考虑
1. 分别从Pacific和Atlantic边缘上的点，往高处走。
2. 对于pacific边缘点往上能走到的点，录入进p_visited中
3. 对于Atlantic边缘点上能走到的点，录入进a_visited中
4. 将p_visited和a_visited做一次交集运算

In [4]:
def pacificAtlantic(matrix):
    if not matrix:
        return []
    
    rows, cols = len(matrix), len(matrix[0])
    P_visited = set()
    A_visited = set()
    for i in range(rows):
        dfs(matrix, (i, 0), P_visited)
        dfs(matrix, (i, cols - 1), A_visited)
    for j in range(cols):
        dfs(matrix, (0, j), P_visited)
        dfs(matrix, (rows - 1, j), A_visited)
    print(P_visited)
    print(A_visited)
    return P_visited & A_visited

def dfs(matrix, pos, visited):
    rows, cols = len(matrix), len(matrix[0])
    if pos in visited:
        return
    visited.add(pos)
    for dirc in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
        x = pos[0] + dirc[0]
        y = pos[1] + dirc[1]
        if 0 <= x < rows and 0 <= y < cols and (x, y) not in visited and matrix[x][y] >= matrix[pos[0]][pos[1]]:
            dfs(matrix, (x, y), visited)
            
matrix = [
    [1,2,2,3,4],
    [3,2,3,4,4],
    [2,4,5,3,1],
    [6,7,1,4,5],
    [5,1,1,2,4]
]
print(pacificAtlantic(matrix))                

{(0, 1), (1, 2), (0, 0), (1, 3), (3, 0), (2, 2), (1, 0), (3, 1), (1, 4), (2, 1), (2, 0), (1, 1), (0, 4), (0, 3), (0, 2), (4, 0)}
{(4, 1), (1, 3), (3, 2), (3, 3), (3, 0), (2, 2), (4, 4), (3, 1), (1, 4), (2, 3), (4, 3), (0, 4), (4, 2), (3, 4), (2, 4), (4, 0)}
{(1, 3), (3, 0), (3, 1), (1, 4), (0, 4), (2, 2), (4, 0)}


In [74]:
# BFS
from collections import deque
def pacificAtlantic(matrix):
    rows, cols = len(matrix), len(matrix[0])
    P_visited = set([(i, 0) for i in range(len(matrix))] + [(j, 0) for j in range(len(matrix[0]))])
    A_visited = set([(i, cols - 1) for i in range(len(matrix))] + [(rows - 1, j) for j in range(len(matrix[0]))])
    return bfs(P_visited) & bfs(A_visited)

def bfs(ocean):
    q = deque(ocean)
    while q:
        (x, y) = q.popleft()
        for (i, j) in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
            m = x + i  
            n = y + j
            if 0 <= m < len(matrix) and 0 <= n < len(matrix) and (m, n) not in ocean and matrix[m][n] >= matrix[x][y]:
                ocean.add((m, n))
                q.append((m, n))
                bfs(ocean)
    return ocean

pacificAtlantic(matrix)

{(0, 4), (1, 3), (1, 4), (2, 2), (3, 0), (3, 1), (4, 0)}

### <a id='Ex3'>Ex.3 Longest Increasing Path in a Matrix</a>

Given an integer matrix, find the length of the longest increasing path.

From each cell, you can either move to four directions: left, right, up or down. You may NOT move diagonally or move outside of the boundary (i.e. wrap-around is not allowed).

<img src="../images/ch17/longest1.png" width="100"/>
<img src="../images/ch17/longest2.png" width="100"/>

# Solution:
针对每一个点找最长升序路径，不过可以使用DP进行优化。

In [78]:
def longestIncreasingPath(matrix):
    if not matrix:
        return 0
    rows, cols = len(matrix), len(matrix[0])
    dp = [[-1 for i in range(cols)] for j in range(rows)] # 注意是先写cols然后写rows
    res = 0
    
    for i in range(rows):
        for j in range(cols):
            res = max(dfs(i, j, matrix, dp, rows, cols), res)
    return res

def dfs(i, j, matrix, dp, rows, cols):
    if dp[i][j] != -1: # also act as visited 
        return dp[i][j]
    res = 1
    for (x, y) in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
        m = i + x
        n = j + y
        if 0 <= m < rows and 0 <= n < cols and matrix[m][n] > matrix[i][j]: # 由于条件设定为不可能越界，因此最小返回1
            length = 1 + dfs(m, n, matrix, dp, rows, cols)
            res = max(res, length)
    dp[i][j] = res # first error: 之前写在了for循环里面，dp[m][n]. 
    return res

nums = [
  [9,9,4],
  [6,6,8],
  [2,1,1]
]
longestIncreasingPath(nums)
            

4

### <a id='Ex4'>Ex.4 01 Matrix</a>

Given a matrix consists of 0 and 1, find the distance of the nearest 0 for each cell.

The distance between two adjacent cells is 1.

Example 1: 

Input:

0 0 0

0 1 0

0 0 0

Output:

0 0 0

0 1 0

0 0 0

Example 2: 

Input:

0 0 0

0 1 0

1 1 1

Output:

0 0 0

0 1 0

1 2 1

# Solution:
一个方法是遍历每一个点，找出每个点距离最近的0，返回距离，但是时间复杂度为O(n^4)

另一个方法利用DP的思维：
创建一个DP矩阵
先将所有0存进一个数组中，遍历该数组，更新数组中的每一个元素的上下左右，并更新DP中的当前元素。

由于是从所有0元素开始的，因此所有0被遍历完后，0旁边的元素一定是有一个最小值的了，然后对0旁边的旁边的元素遍历...

In [83]:
def updateMatrix(matrix):
    if not matrix:
        return []
    rows, cols = len(matrix), len(matrix[0])
    dp = [[0 for i in range(cols)] for j in range(rows)]
    arr = []
    for i in range(rows):
        for j in range(cols):
            if matrix[i][j] != 0:
                dp[i][j] = 0x7fffffff
            else:
                arr.append((i, j))
    
    for (x, y) in arr:
        for (i, j) in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
            m = x + i
            n = y + j
            if 0 <= m < rows and 0 <= n < cols:
                if dp[m][n] > 1 + dp[x][y]:
                    dp[m][n] = 1 + dp[x][y]
                    arr.append((m, n))       # 逻辑错误：这里只有当dp[m][n] > 1 + dp[x][y]才进行append，否则会一直append，导致死循环
    
    return dp

matrix = [
    [0, 0, 0],
    [0, 1, 0],
    [0, 0, 0],
]
updateMatrix(matrix)

[[0, 0, 0], [0, 1, 0], [0, 0, 0]]

# Solution2:
初始化一个dp矩阵用来表示每个元素距离最近0的距离，如果Matrix是0，dp存0，否则存无穷大

先从左上到右下计算出每个元素上下左右元素的距离0的最小值。当然此时由于前面的元素本身不一定是最优解，所以其上下左右的也不一定是最优解。这只能保证，如果一个元素是最优解的话，那么其右和其下是最优解

然后以同样的方式从右下计算到左上，这一轮可以确保一个最优解元素的上和左是最优解。

两轮循环后，一个元素的上下左右都会是最优解了。

In [99]:
def updateMatrix2(matrix):
    rows, cols = len(matrix), len(matrix[0])
    dp = [[0 if matrix[i][j] == 0 else 0x7fffffff for i in range(cols)] for j in range(rows)]
    
    for i in range(rows):
        for j in range(cols):
            DP(i, j, dp, matrix, rows, cols)
    
    for i in range(rows - 1, -1, -1):
        for j in range(cols - 1, -1, -1):
            DP(i, j, dp, matrix, rows, cols)
    
    return dp

def DP(i, j, dp, matrix, rows, cols):
    for (x, y) in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
        m = x + i
        n = y + j # first error：write j as i
        if 0 <= m < rows and 0 <= n < cols:
            dp[m][n] = min(dp[m][n], 1 + dp[i][j])

matrix = [
    [0, 0, 0],
    [0, 1, 0],
    [0, 0, 0],
]
updateMatrix2(matrix)



[[0, 0, 0], [0, 1, 0], [0, 0, 0]]

# Graph Practice IV #

### <a id='Ex1'>Ex.1 Accounts Merge</a>

Given a list accounts, each element accounts[i] is a list of strings, where the first element accounts[i][0] is a name, and the rest of the elements are emails representing emails of the account.

Now, we would like to merge these accounts. Two accounts definitely belong to the same person if there is some email that is common to both accounts. Note that even if two accounts have the same name, they may belong to different people as people could have the same name. A person can have any number of accounts initially, but all of their accounts definitely have the same name.

After merging the accounts, return the accounts in the following format: the first element of each account is the name, and the rest of the elements are emails in sorted order. The accounts themselves can be returned in any order.

Example 1:

Input: 


In [100]:
accounts = [
    ["John", "johnsmith@mail.com", "john00@mail.com"], 
    ["John", "johnnybravo@mail.com"], 
    ["John", "johnsmith@mail.com", "john_newyork@mail.com"], 
    ["Mary", "mary@mail.com"]
]

Output = [
    ["John", 'john00@mail.com', 'john_newyork@mail.com', 'johnsmith@mail.com'],  
    ["John", "johnnybravo@mail.com"], 
    ["Mary", "mary@mail.com"]
]

Explanation: 

The first and third John's are the same person as they have the common email "johnsmith@mail.com".

The second John and Mary are different people as none of their email addresses are used by other accounts.

We could return these lists in any order.

** Solution **

We give each account an ID, based on the index of it within the list of accounts.


In [101]:
[
["John", "johnsmith@mail.com", "john00@mail.com"], # Account 0
["John", "johnnybravo@mail.com"], # Account 1
["John", "johnsmith@mail.com", "john_newyork@mail.com"],  # Account 2
["Mary", "mary@mail.com"] # Account 3
]
print()




Next, build an emails_accounts_map that maps an email to a list of accounts, which can be used to track which email is linked to which account. This is essentially our graph.

In [102]:
# emails_accounts_map of email to account ID
{
  "johnsmith@mail.com": [0, 2],
  "john00@mail.com": [0],
  "johnnybravo@mail.com": [1],
  "john_newyork@mail.com": [2],
  "mary@mail.com": [3]
}

{'johnsmith@mail.com': [0, 2],
 'john00@mail.com': [0],
 'johnnybravo@mail.com': [1],
 'john_newyork@mail.com': [2],
 'mary@mail.com': [3]}

Next we do a DFS on each account in accounts list and look up emails_accounts_map to tell us which accounts are linked to that particular account via common emails. This will make sure we visit each account only once. This is a recursive process and we should collect all the emails that we encounter along the way.

Lastly, sort the collected emails and add it to final results, res along with the name.

我们可以发现，这题的关键在于找到accounts之间的联系，然后合并。找到联系的方式是通过共用的email。因此我们需要建立一个email->accounts的dict。

首先需要遍历所有的account，针对每一个account使用DFS去寻找共通的account。

使用DFS的方式是：先建立一个email set，其中存储可能存在一个account中的所有email。根据当前account（标记当前account是visited），通过里面的email和email_account_map去寻找下一个account。以此类推

In [119]:
def accountsMerge(accounts):
    emails_accounts_map = {}
    for i, account in enumerate(accounts):
        for email in account[1:]:
            if email not in emails_accounts_map:
                emails_accounts_map[email] = [i] # first error: write i as account
            else:
                emails_accounts_map[email].append(i)
    
    mergedAccounts = []
    visited = set()
    for i, account in enumerate(accounts):
        name = account[0]
        emails = set()
#         visited = set() Third error
        print(visited)
        if i in visited:
            continue
        dfs(i, emails, visited, emails_accounts_map, accounts)
        mergedAccounts.append([name] + list(emails)) # second error: write () as []
    return mergedAccounts

def dfs(i, emails, visited, emails_accounts_map, accounts):
    if i in visited:
        return
    visited.add(i)
    for email in accounts[i][1:]:
        emails.add(email)
        for neighbor in emails_accounts_map[email]:
            dfs(neighbor, emails, visited, emails_accounts_map, accounts)

accounts = [
    ["John", "johnsmith@mail.com", "john00@mail.com"], 
    ["John", "johnnybravo@mail.com"], 
    ["John", "johnsmith@mail.com", "john_newyork@mail.com"], 
    ["Mary", "mary@mail.com"]
]

accountsMerge(accounts)

set()
{0, 2}
{0, 1, 2}
{0, 1, 2}


[['John', 'johnsmith@mail.com', 'john00@mail.com', 'john_newyork@mail.com'],
 ['John', 'johnnybravo@mail.com'],
 ['Mary', 'mary@mail.com']]

### <a id='Ex2'>Ex.2 Word Ladder</a>

Given two words (beginWord and endWord), and a dictionary's word list, find the length of shortest transformation sequence from beginWord to endWord, such that:

Only one letter can be changed at a time.
Each transformed word must exist in the word list. Note that beginWord is not a transformed word.
For example,

Given:

beginWord = "hit"

endWord = "cog"

wordList = ["hot","dot","dog","lot","log","cog"]

As one shortest transformation is "hit" -> "hot" -> "dot" -> "dog" -> "cog",

return its length 5.

In [6]:
from collections import deque
def ladderLength(beginWord, endWord, wordList):
    wordSet = set(wordList)
    queue = deque([[beginWord, 1]])
    while queue:
        word, steps = queue.popleft()
        for pos in range(len(word)):
            for ch in 'abcdefghijklmnopqrstuvwxyz':
                newWord = word[:pos] + ch + word[pos+1:]
                if newWord == endWord:
                    return steps + 1
                elif newWord in wordSet:
                    queue.append([newWord, steps + 1])
                    wordSet.remove(newWord) # removing it from wordSet means that it has been visited
    return 0

beginWord = "hit"
endWord = "cog"
wordList = ["hot","dot","dog","lot","log","cog"]

ladderLength(beginWord, endWord, wordList)

5

### <a id='Ex3'>Ex.3 Word Ladder II</a>

Given two words (beginWord and endWord), and a dictionary's word list, find all shortest transformation sequence(s) from beginWord to endWord, such that:

Only one letter can be changed at a time
Each transformed word must exist in the word list. Note that beginWord is not a transformed word.
For example,

Given:

beginWord = "hit"

endWord = "cog"

wordList = ["hot","dot","dog","lot","log","cog"]

Returns:

In [7]:
  [
    ["hit","hot","dot","dog","cog"],
    ["hit","hot","lot","log","cog"]
  ]

[['hit', 'hot', 'dot', 'dog', 'cog'], ['hit', 'hot', 'lot', 'log', 'cog']]

# Solution:
此题的感觉在于如何存储路径和如何找到一条最短路径的情况下确保同一层执行完后才结束

难点一：用dict去存储，格式为{parent:[children]}

难点二：用另类的queue，每次使用一个level同时开始形成next_level，即使找到最短路径也只有当前level执行完后才结束

In [14]:
from collections import defaultdict
import string
def findLadders(start, end, wordList):
    wordSet = set(wordList)
#     child2parents = defaultdict(set) # child : [parents]
    child2parents = {}
    level = {start}
    
    while level and end not in child2parents:
        next_level = defaultdict(set)
        for word in level:
            for pos in range(len(word)):
                for ch in string.ascii_lowercase:
                    newWord = word[:pos] + ch + word[pos + 1:]
                    if newWord in wordSet and newWord not in child2parents: # newWord not in child2parents这句的作用相当于visited
                        next_level[newWord].add(word)
#                         wordSet.remove(newWord)
# 注意上面newWord not in child2parents和wordSet.remove(newWord)虽然都是充当了visited的角色，但是作用差别很大。
# 对于wordSet.remove(newWord)，当我们只要找到一个cog的时候，下一个就找不到了，比如当dog->cog的时候，那么log->cog就找不到了，因为cog被remove了
# 但是对于newWord not in child2parents，因为next_level[newWord].add(word)没有对child2parents修改，因此同一level之间不互相影响
        level = next_level
        child2parents.update(next_level)
    
    print(child2parents)
    res = [[end]]
#     while res[0][0] != start:
#         for r in res:
#             for p in child2parents[r[0]]:
#                 r = [p] + r
    while res[0][0] != start:
        res = [[p] + r for r in res for p in child2parents[r[0]]]
    return res

beginWord = "hit"
endWord = "cog"
wordList = ["hot","dot","dog","lot","log","cog"]
findLadders(beginWord, endWord, wordList)

{'hot': {'hit'}, 'dot': {'hot'}, 'lot': {'hot'}, 'dog': {'dot'}, 'log': {'lot'}, 'cog': {'dog', 'log'}}


[['hit', 'hot', 'dot', 'dog', 'cog'], ['hit', 'hot', 'lot', 'log', 'cog']]

In [7]:
import math
math.log(2, 2)
-1/3 * math.log(4/27, 2)
print(-(2/5*math.log(2/5, 2) + 3/5*math.log(3/5, 2)))

0.9709505944546686


In [8]:
0.92*3/8 + 0.97*5/8

0.9512499999999999

In [9]:
print(-(3/4*math.log(3/4, 2) + 1/4*math.log(1/4, 2)))

0.8112781244591328
