In [None]:
import re, clipboard, uuid
import numpy as np
from itertools import takewhile

import pandas as pd

from src.service.ParseService import ParseService


class ComplicatedParser(ParseService):
    def __init__(self):
        self.title = ['발파진동(cm/s)', '진동레벨dB(V)', '소음레벨dB(A)']
        self.blast_pattern = None

    def extract_columns(self, items_list):
        columns = list(takewhile(lambda x: not re.match(r'\d+월\d+일', x), items_list))
        if '발파패턴' in columns:
            self.blast_pattern = True
        else:
            self.blast_pattern = False
            
        return [i for i in items_list if i not in columns]

    def conversion_error_value(self, non_columns_list):
        conversion_error_list = []

        for item in non_columns_list:
            if item == "계측기오류":
                conversion_error_list.extend([np.nan, np.nan, np.nan])
            elif re.match(r'n/*?t', item, re.IGNORECASE) or item == "-":
                conversion_error_list.append(np.nan)
            else:
                conversion_error_list.append(item)

        return conversion_error_list

    def delete_other_value(self, conversion_error_list):
        filtered_list = []
        skip_count = 0

        for index in range(len(conversion_error_list)):
            try:
                if skip_count > 0:
                    skip_count -= 1
                    continue
                elif conversion_error_list[index] == '계' or conversion_error_list[index - 1] == '계':
                    continue
                elif re.match(r'\d+:\d+', conversion_error_list[index]) and re.match(r'\d+회', conversion_error_list[index - 1]):
                    print(conversion_error_list[index], end=" ")
                    filtered_list.append(conversion_error_list[index])
                    print(self.blast_pattern)
                    if self.blast_pattern:
                        skip_count += 4
                    else:
                        skip_count += 3
                else:
                    filtered_list.append(conversion_error_list[index])
            except TypeError as e:
                filtered_list.append(conversion_error_list[index])

        return filtered_list

    def classification_by_date(self, filtered_list):
        section = []
        current_date_section = []

        for i in filtered_list:
            try:
                if re.match(r'\d+월\d+일', i) and current_date_section:
                    section.append(current_date_section)
                    current_date_section = []
                current_date_section.append(i)
            except TypeError as t:
                current_date_section.append(i)

        if current_date_section:
            section.append(current_date_section)

        return section

    def extract_location(self, classification_list):
        location_list = []
        data_count = 0

        for items in classification_list:
            for item in items:
                try:
                    if re.match(r'\d+\.\d+', item) or item is np.nan:
                        data_count += 1
                    elif data_count >= 3:
                        location_list.append(item)
                        data_count = 0
                except TypeError as t:
                    data_count += 1

        return list(set(location_list))

    def get_dict(self, classification_list, location_list):
        result = {}
        value = []

        date_key, time_key, location_key = None, None, None

        for item in classification_list:
            for x, i in enumerate(item):
                if isinstance(i, float):
                    i = str(i)
                if re.match(r'\d+월\d+일', i):
                    date_key = i
                elif re.match(r'\d+회', i):
                    count_key = i
                elif re.match(r'\d+:\d+', i):
                    time_key = i
                elif i not in location_list:
                    value.append(i)
                elif i in location_list:
                    location_key = i
                    unique_key = uuid.uuid4()
                    result[location_key] = {} if location_key not in result else result[location_key]
                    result[location_key][unique_key] = {'일시': f'{date_key} {time_key}'}
                    print(value, len(value))
                    for j in range(len(self.title)):
                        try:
                            tmp = float(value[j])
                        except ValueError:
                            tmp = np.nan
                        except IndexError:
                            print(value)
                        finally:
                            result[location_key][unique_key][self.title[j]] = tmp
                    value = []

        return result


In [35]:
test_parser = ComplicatedParser()
table_list = list(filter(lambda x: x, clipboard.paste().replace("\r\n", " ").split(" ")))

print(table_list)

['일시', '발파횟수', '시간', '장약량(kg)', '발파위치', '발파진동및소음측정치(max값)', '지발당장약량', '총장약량', 'STA', '발파진동(cm/s)', '진동레벨dB(V)', '소음레벨dB(A)', '측정위치', '2월1일', '1회', '7:01', '1.6-2.0', '310.0', '7k+751.00~753.50(종점)', '0.123', '64.92', '66.36', '안산천주교회', '2회', '11:31', '1.6-2.0', '160.0', '5k+184.15~182.90(시점)', 'N/T', 'N/T', 'N/T', '풍전빌딩(현장)', '0.105', '64.92', '60.95', '주공1단지아파트110동', '0.127', '68.22', '74.8', '로얄프라자', '3회', '18:16', '1.6-2.0', '283.0', '7k+753.80~756.00(종점)', '0.161', '60.58', '63.16', '안산천주교회', '계', '753.0', '2월2일', '1회', '7:00', '1.6-2.0', '310.0', '7k+756.00~758.50(종점)', '0.122', '68.41', '69.03', '안산천주교회', '2회', '10:42', '1.6-2.0', '160.0', '5k+181.65~180.40(시점)', '0.048', '54.74', '55.37', '풍전빌딩(현장)', '0.071', '60.42', '61.22', '주공1단지아파트110동', 'N/T', 'N/T', 'N/T', '로얄프라자', '3회', '18:17', '1.6-2.0', '306.0', '7k+758.50~761.00(종점)', '0.046', '56.69', '59.07', '안산천주교회', '계', '776.0', '2월3일', '1회', '7:01', '1.6-2.0', '315.0', '7k+759.75~762.25(종점)', '0.059', '59.6', '61.86', '안산천주교회'

In [36]:
non_columns_list = test_parser.extract_columns(table_list)

print(non_columns_list)

['2월1일', '1회', '7:01', '1.6-2.0', '310.0', '7k+751.00~753.50(종점)', '0.123', '64.92', '66.36', '안산천주교회', '2회', '11:31', '1.6-2.0', '160.0', '5k+184.15~182.90(시점)', 'N/T', 'N/T', 'N/T', '풍전빌딩(현장)', '0.105', '64.92', '60.95', '주공1단지아파트110동', '0.127', '68.22', '74.8', '로얄프라자', '3회', '18:16', '1.6-2.0', '283.0', '7k+753.80~756.00(종점)', '0.161', '60.58', '63.16', '안산천주교회', '계', '753.0', '2월2일', '1회', '7:00', '1.6-2.0', '310.0', '7k+756.00~758.50(종점)', '0.122', '68.41', '69.03', '안산천주교회', '2회', '10:42', '1.6-2.0', '160.0', '5k+181.65~180.40(시점)', '0.048', '54.74', '55.37', '풍전빌딩(현장)', '0.071', '60.42', '61.22', '주공1단지아파트110동', 'N/T', 'N/T', 'N/T', '로얄프라자', '3회', '18:17', '1.6-2.0', '306.0', '7k+758.50~761.00(종점)', '0.046', '56.69', '59.07', '안산천주교회', '계', '776.0', '2월3일', '1회', '7:01', '1.6-2.0', '315.0', '7k+759.75~762.25(종점)', '0.059', '59.6', '61.86', '안산천주교회', '2회', '10:16', '1.6-2.0', '165.0', '5k+179.15~177.90(시점)', '0.083', '58.5', '64.24', '풍전빌딩(현장)', '0.092', '62.41', '60.64', '주공1단지

In [37]:
conversion_error_list = test_parser.conversion_error_value(non_columns_list)

print(conversion_error_list)

['2월1일', '1회', '7:01', '1.6-2.0', '310.0', '7k+751.00~753.50(종점)', '0.123', '64.92', '66.36', '안산천주교회', '2회', '11:31', '1.6-2.0', '160.0', '5k+184.15~182.90(시점)', nan, nan, nan, '풍전빌딩(현장)', '0.105', '64.92', '60.95', '주공1단지아파트110동', '0.127', '68.22', '74.8', '로얄프라자', '3회', '18:16', '1.6-2.0', '283.0', '7k+753.80~756.00(종점)', '0.161', '60.58', '63.16', '안산천주교회', '계', '753.0', '2월2일', '1회', '7:00', '1.6-2.0', '310.0', '7k+756.00~758.50(종점)', '0.122', '68.41', '69.03', '안산천주교회', '2회', '10:42', '1.6-2.0', '160.0', '5k+181.65~180.40(시점)', '0.048', '54.74', '55.37', '풍전빌딩(현장)', '0.071', '60.42', '61.22', '주공1단지아파트110동', nan, nan, nan, '로얄프라자', '3회', '18:17', '1.6-2.0', '306.0', '7k+758.50~761.00(종점)', '0.046', '56.69', '59.07', '안산천주교회', '계', '776.0', '2월3일', '1회', '7:01', '1.6-2.0', '315.0', '7k+759.75~762.25(종점)', '0.059', '59.6', '61.86', '안산천주교회', '2회', '10:16', '1.6-2.0', '165.0', '5k+179.15~177.90(시점)', '0.083', '58.5', '64.24', '풍전빌딩(현장)', '0.092', '62.41', '60.64', '주공1단지아파트110동', na

In [38]:
filtered_list = test_parser.delete_other_value(conversion_error_list)

print(filtered_list)

7:01 False
11:31 False
18:16 False
7:00 False
10:42 False
18:17 False
7:01 False
10:16 False
17:30 False
7:25 False
11:04 False
12:15 False
16:24 False
7:00 False
10:41 False
17:31 False
7:00 False
9:52 False
17:24 False
10:12 False
17:21 False
7:00 False
9:55 False
18:01 False
7:42 False
17:49 False
7:00 False
11:21 False
17:05 False
7:00 False
10:14 False
17:04 False
7:00 False
10:11 False
12:34 False
16:30 False
7:00 False
10:37 False
7:00 False
10:40 False
7:01 False
10:35 False
17:35 False
7:00 False
10:20 False
16:45 False
7:01 False
10:00 False
17:37 False
7:15 False
11:36 False
12:00 False
16:59 False
7:01 False
12:20 False
18:30 False
7:01 False
18:24 False
7:11 False
11:51 False
17:40 False
['2월1일', '1회', '7:01', '0.123', '64.92', '66.36', '안산천주교회', '2회', '11:31', nan, nan, nan, '풍전빌딩(현장)', '0.105', '64.92', '60.95', '주공1단지아파트110동', '0.127', '68.22', '74.8', '로얄프라자', '3회', '18:16', '0.161', '60.58', '63.16', '안산천주교회', '2월2일', '1회', '7:00', '0.122', '68.41', '69.03', '안산천주교회',

In [39]:
classification_list = test_parser.classification_by_date(filtered_list)

print(classification_list)

[['2월1일', '1회', '7:01', '0.123', '64.92', '66.36', '안산천주교회', '2회', '11:31', nan, nan, nan, '풍전빌딩(현장)', '0.105', '64.92', '60.95', '주공1단지아파트110동', '0.127', '68.22', '74.8', '로얄프라자', '3회', '18:16', '0.161', '60.58', '63.16', '안산천주교회'], ['2월2일', '1회', '7:00', '0.122', '68.41', '69.03', '안산천주교회', '2회', '10:42', '0.048', '54.74', '55.37', '풍전빌딩(현장)', '0.071', '60.42', '61.22', '주공1단지아파트110동', nan, nan, nan, '로얄프라자', '3회', '18:17', '0.046', '56.69', '59.07', '안산천주교회'], ['2월3일', '1회', '7:01', '0.059', '59.6', '61.86', '안산천주교회', '2회', '10:16', '0.083', '58.5', '64.24', '풍전빌딩(현장)', '0.092', '62.41', '60.64', '주공1단지아파트110동', nan, nan, nan, '로얄프라자', '3회', '17:30', '0.057', '58.64', '60.59', '안산천주교회'], ['2월4일', '1회', '7:25', '0.05', '57.76', '61.53', '안산천주교회', '2회', '11:04', '0.07', '59.13', '53.8', '풍전빌딩(현장)', '0.113', '61.03', '61.39', '주공1단지아파트110동', nan, nan, nan, '로얄프라자'], ['2월5일', '1회', '12:15', nan, nan, nan, '안산천주교회', '2회', '16:24', '0.081', '60.05', '54.81', '풍전빌딩(현장)', '0.085', '61.6', '

In [40]:
location_list = test_parser.extract_location(classification_list)

print(location_list)

['안산천주교회', '주공1단지110동(현장)', '로얄프라자', '국원물산', '주공1단지아파트110동', '풍전빌딩(자동화계측)', '풍전빌딩(현장)']


In [41]:
result_dict = test_parser.get_dict(classification_list, location_list)

print(result_dict)

['0.123', '64.92', '66.36'] 3
['nan', 'nan', 'nan'] 3
['0.105', '64.92', '60.95'] 3
['0.127', '68.22', '74.8'] 3
['0.161', '60.58', '63.16'] 3
['0.122', '68.41', '69.03'] 3
['0.048', '54.74', '55.37'] 3
['0.071', '60.42', '61.22'] 3
['nan', 'nan', 'nan'] 3
['0.046', '56.69', '59.07'] 3
['0.059', '59.6', '61.86'] 3
['0.083', '58.5', '64.24'] 3
['0.092', '62.41', '60.64'] 3
['nan', 'nan', 'nan'] 3
['0.057', '58.64', '60.59'] 3
['0.05', '57.76', '61.53'] 3
['0.07', '59.13', '53.8'] 3
['0.113', '61.03', '61.39'] 3
['nan', 'nan', 'nan'] 3
['nan', 'nan', 'nan'] 3
['0.081', '60.05', '54.81'] 3
['0.085', '61.6', '63.85'] 3
['0.185', '68.98', '79.14'] 3
['0.062', '59.16', '64.92'] 3
['nan', 'nan', 'nan'] 3
['nan', 'nan', 'nan'] 3
['0.125', '66.55', '72.45'] 3
['0.081', '70.71', '59.41'] 3
['0.129', '65.67', '65.5'] 3
['0.107', '62.67', '56.07'] 3
['nan', 'nan', 'nan'] 3
['0.088', '66.68', '71.82'] 3
['0.057', '58.12', '59.57'] 3
['nan', 'nan', 'nan'] 3
['0.103', '61.63', '64.13'] 3
['0.055', '5

In [42]:
import pandas as pd
test = []
for item in result_dict:
    test.append([item, pd.DataFrame(result_dict[item]).transpose().reset_index(drop=True)])

print(test)


[['안산천주교회',              일시 발파진동(cm/s) 진동레벨dB(V) 소음레벨dB(A)
0     2월1일 7:01      0.123     64.92     66.36
1    2월1일 18:16      0.161     60.58     63.16
2     2월2일 7:00      0.122     68.41     69.03
3    2월2일 18:17      0.046     56.69     59.07
4     2월3일 7:01      0.059      59.6     61.86
5    2월3일 17:30      0.057     58.64     60.59
6     2월4일 7:25       0.05     57.76     61.53
7    2월5일 12:15        NaN       NaN       NaN
8     2월6일 7:00      0.062     59.16     64.92
9    2월6일 17:31      0.081     70.71     59.41
10    2월7일 7:00      0.129     65.67      65.5
11   2월7일 17:24      0.057     58.12     59.57
12  2월13일 10:12        NaN       NaN       NaN
13   2월14일 7:00      0.101     58.97     64.51
14  2월14일 18:01      0.082     59.54     64.04
15  2월15일 17:49      0.137     63.86     62.79
16   2월16일 7:00      0.208     68.43     62.08
17  2월16일 17:05      0.124     64.39     67.44
18   2월17일 7:00      0.042     55.65     63.54
19  2월17일 17:04      0.187      64.8     62.98
2