In [None]:
import cv2
import numpy as np
import math
import pytesseract

img = cv2.imread('timetable.jpg')
dilated = cv2.dilate(img, kernel=np.ones((5, 5), np.uint8), iterations=1)

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 200, 200, apertureSize=3)
cv2.imwrite("dilated.jpg", dilated)
cv2.imwrite("gray.jpg", gray)
cv2.imwrite("edges.jpg", edges)

In [None]:
class Line:
    HORIZONTAL = 0
    VERTICAL = 1

    def __init__(self, x0, y0, x1, y1):
        if x0 < x1:
            self.x0 = x0
            self.y0 = y0
            self.x1 = x1
            self.y1 = y1
        else:
            self.x0 = x1
            self.y0 = y1
            self.x1 = x0
            self.y1 = y0
    
    def configuration(self):
        return Line.HORIZONTAL if  abs(self.x0-self.x1) < abs(self.y0-self.y1) else Line.VERTICAL

    def __str__(self):
        return "(" + str(self.x0) + ", " + str(self.y0) + ") (" + str(self.x1) + ", " + str(self.y1) + ")"
    
    def length(self):
        return math.sqrt((self.x0-self.x1) ** 2 + (self.y0-self.y1) ** 2)

    def coordinates(self):
      return ((self.x0, self.y0), (self.x1, self.y1))

    def coordinates_as_int(self):
      return ((int(self.x0), int(self.y0)), (int(self.x1), int(self.y1)))

In [None]:
lsd = cv2.createLineSegmentDetector(cv2.LSD_REFINE_ADV, sigma_scale=0.3)
dlines = lsd.detect(edges)
lines = [Line(x0, y0, x1, y1) for x0, y0, x1, y1 in dlines[0][:, 0]]
imgCopy = img.copy()
for dline in dlines[0]:
    x0 = int(round(dline[0][0]))
    y0 = int(round(dline[0][1]))
    x1 = int(round(dline[0][2]))
    y1 = int(round(dline[0][3]))
    cv2.putText(imgCopy, "(" + str(x0) + ", " + str(y0) + ")", (x0, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
    cv2.putText(imgCopy, "(" + str(x1) + ", " + str(y1) + ")", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
    cv2.line(imgCopy, (x0, y0), (x1, y1), (0, 255, 0), 2)

cv2.imwrite("lsd.jpg", imgCopy)

In [None]:
tableStructure = np.zeros(img.shape, dtype=np.uint8)
print(img.shape)

for line in lines:
  coordinates = line.coordinates_as_int()
  cv2.line(tableStructure, coordinates[0], coordinates[1], (255, 255, 255), 1)

In [None]:
kernel = np.ones((5, 5), np.uint8)
tableStructure = cv2.dilate(tableStructure, kernel, iterations=1)
cv2.imwrite("tableStructure.jpg", tableStructure)

In [None]:
table_structure = cv2.cvtColor(tableStructure, cv2.COLOR_RGB2GRAY)
contours, hierarchy = cv2.findContours(table_structure, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = 0

full_list=[]
row=[]
data=[]
first_iter=0
firsty=-1

rectangle_table = np.zeros(table_structure.shape, np.uint8)
for c in contours[:20]:
  x, y, w, h = cv2.boundingRect(c)
  print(x, y, w, h, sep=', ')
  cv2.rectangle(rectangle_table, (x, y), (x+w, y+h), 255, 1)

cv2.imwrite("rectangle_table.jpg", rectangle_table)

In [None]:
fcont = cv2.boundingRect(contours[-13])
x, y, w, h = fcont
print(x, y, h, w)

newImg = gray[y:y+h, x:x+w]
print(newImg.shape)
print(pytesseract.image_to_string(newImg))
Image.fromarray(newImg)
