1818
1919package com .dtstack .flink .sql .dirtyManager .manager ;
2020
21- import com .dtstack . flink . sql . classloader . ClassLoaderManager ;
21+ import com .alibaba . fastjson . JSONObject ;
2222import com .dtstack .flink .sql .dirtyManager .consumer .AbstractDirtyDataConsumer ;
23+ import com .dtstack .flink .sql .dirtyManager .consumer .DirtyConsumerFactory ;
2324import com .dtstack .flink .sql .dirtyManager .entity .DirtyDataEntity ;
2425import com .dtstack .flink .sql .factory .DTThreadFactory ;
25- import com .dtstack .flink .sql .util .PluginUtil ;
2626import org .slf4j .Logger ;
2727import org .slf4j .LoggerFactory ;
2828
29- import java .io .File ;
3029import java .io .Serializable ;
31- import java .lang .reflect .Constructor ;
32- import java .util .Map ;
30+ import java .util .Properties ;
3331import java .util .concurrent .LinkedBlockingQueue ;
3432import java .util .concurrent .ThreadPoolExecutor ;
3533import java .util .concurrent .TimeUnit ;
4240 */
4341public class DirtyDataManager implements Serializable {
4442
43+ public final static int MAX_POOL_SIZE_LIMIT = 5 ;
4544 private static final long serialVersionUID = 7190970299538893497L ;
46-
4745 private static final Logger LOG = LoggerFactory .getLogger (DirtyDataManager .class );
46+ private static final String DIRTY_BLOCK_STR = "blockingInterval" ;
47+ private static final String DIRTY_LIMIT_RATE_STR = "errorLimitRate" ;
48+ private final static int MAX_TASK_QUEUE_SIZE = 100 ;
49+ private final static String DEFAULT_ERROR_LIMIT_RATE = "0.8" ;
50+ private final static String DEFAULT_BLOCKING_INTERVAL = "60" ;
51+ public static AbstractDirtyDataConsumer consumer ;
4852
49- private static final String CLASS_PRE_STR = "com.dtstack.flink.sql.dirty" ;
50-
51- private static final String CLASS_POST_STR = "DirtyDataConsumer" ;
52-
53- private static final String DIRTY_CONSUMER_PATH = "dirtyData" ;
54-
55- /**
56- * 写入队列阻塞时间
57- */
58- private long blockingInterval ;
59-
60- /**
61- * 缓存脏数据信息队列
62- */
63- public final LinkedBlockingQueue <DirtyDataEntity > queue = new LinkedBlockingQueue <>();
64-
53+ private static ThreadPoolExecutor dirtyDataConsumer ;
6554 /**
6655 * 统计manager收集到的脏数据条数
6756 */
6857 private final AtomicLong count = new AtomicLong (0 );
69-
7058 /**
7159 * 脏数据写入队列失败条数
7260 */
7361 private final AtomicLong errorCount = new AtomicLong (0 );
74-
62+ /**
63+ * 写入队列阻塞时间
64+ */
65+ private long blockingInterval ;
66+ /**
67+ * 任务失败的脏数据比例
68+ */
7569 private double errorLimitRate ;
7670
77- public static AbstractDirtyDataConsumer consumer ;
78-
79- private static ThreadPoolExecutor dirtyDataConsumer ;
80-
81- public final static int MAX_POOL_SIZE_LIMIT = 5 ;
82-
83- private final static int MAX_TASK_QUEUE_SIZE = 100 ;
84-
85- private final static String DEFAULT_TYPE = "console" ;
86-
87- private final static String DEFAULT_ERROR_LIMIT_RATE = "0.8" ;
88-
89- private final static String DEFAULT_BLOCKING_INTERVAL = "60" ;
90-
9171 /**
9272 * 通过参数生成manager实例,并同时将consumer实例化
9373 */
94- public static DirtyDataManager newInstance (Map <String , String > properties ) throws Exception {
95- DirtyDataManager manager = new DirtyDataManager ();
96- manager .blockingInterval = Long .parseLong (properties .getOrDefault ("blockingInterval" , DEFAULT_BLOCKING_INTERVAL ));
97- manager .errorLimitRate = Double .parseDouble (properties .getOrDefault ("errorLimitRate" , DEFAULT_ERROR_LIMIT_RATE ));
98- consumer = createConsumer (properties );
99- consumer .init (properties );
100- consumer .setQueue (manager .queue );
101- dirtyDataConsumer = new ThreadPoolExecutor (MAX_POOL_SIZE_LIMIT , MAX_POOL_SIZE_LIMIT , 0 , TimeUnit .MILLISECONDS ,
102- new LinkedBlockingQueue <>(MAX_TASK_QUEUE_SIZE ), new DTThreadFactory ("dirtyDataConsumer" ), new ThreadPoolExecutor .CallerRunsPolicy ());
103- dirtyDataConsumer .execute (consumer );
104-
105- return manager ;
74+ public static DirtyDataManager newInstance (Properties properties ) {
75+ try {
76+ DirtyDataManager manager = new DirtyDataManager ();
77+ manager .blockingInterval = Long .parseLong (String .valueOf (properties .getOrDefault (DIRTY_BLOCK_STR , DEFAULT_BLOCKING_INTERVAL )));
78+ manager .errorLimitRate = Double .parseDouble (String .valueOf (properties .getOrDefault (DIRTY_LIMIT_RATE_STR , DEFAULT_ERROR_LIMIT_RATE )));
79+ consumer = DirtyConsumerFactory .getDirtyConsumer (
80+ properties .getProperty ("type" )
81+ , properties .getProperty ("pluginPath" )
82+ , properties .getProperty ("pluginLoadMode" )
83+ );
84+ consumer .init (properties );
85+ consumer .setQueue (new LinkedBlockingQueue <>());
86+ dirtyDataConsumer = new ThreadPoolExecutor (MAX_POOL_SIZE_LIMIT , MAX_POOL_SIZE_LIMIT , 0 , TimeUnit .MILLISECONDS ,
87+ new LinkedBlockingQueue <>(MAX_TASK_QUEUE_SIZE ), new DTThreadFactory ("dirtyDataConsumer" ), new ThreadPoolExecutor .CallerRunsPolicy ());
88+ dirtyDataConsumer .execute (consumer );
89+ return manager ;
90+ } catch (Exception e ) {
91+ throw new RuntimeException ("create dirtyManager error!" , e );
92+ }
10693 }
10794
10895 /**
109- * 通过动态加载的方式加载Consumer
96+ * 设置脏数据插件默认配置
97+ *
98+ * @return console的默认配置
11099 */
111- private static AbstractDirtyDataConsumer createConsumer (Map <String , String > properties ) throws Exception {
112- String type = properties .getOrDefault ("type" , DEFAULT_TYPE );
113- String consumerType = DIRTY_CONSUMER_PATH + File .separator + type ;
114- String consumerJar = PluginUtil .getJarFileDirPath (consumerType , properties .getOrDefault ("pluginPath" , null ), "shipfile" );
115- String className = CLASS_PRE_STR + "." + type .toLowerCase () + "." + upperCaseFirstChar (type + CLASS_POST_STR );
116-
117- return ClassLoaderManager .newInstance (consumerJar , cl -> {
118- Class <?> clazz = cl .loadClass (className );
119- Constructor <?> constructor = clazz .getConstructor ();
120- return (AbstractDirtyDataConsumer ) constructor .newInstance ();
121- });
100+ public static String buildDefaultDirty () {
101+ JSONObject jsonObject = new JSONObject ();
102+ jsonObject .put ("type" , "console" );
103+ jsonObject .put ("printLimit" , "1000" );
104+ return jsonObject .toJSONString ();
122105 }
123106
124107 /**
@@ -136,16 +119,16 @@ public void close() {
136119 /**
137120 * 收集脏数据放入队列缓存中,记录放入失败的数目和存入队列中的总数目,如果放入失败的数目超过一定比例,那么manager任务失败
138121 */
139- public void collectDirtyData (String dataInfo , String cause , String field ) {
140- DirtyDataEntity dirtyDataEntity = new DirtyDataEntity (dataInfo , System .currentTimeMillis (), cause , field );
122+ public void collectDirtyData (String dataInfo , String cause ) {
123+ DirtyDataEntity dirtyDataEntity = new DirtyDataEntity (dataInfo , System .currentTimeMillis (), cause );
141124 try {
142- queue . offer (dirtyDataEntity , blockingInterval , TimeUnit . MILLISECONDS );
125+ consumer . collectDirtyData (dirtyDataEntity , blockingInterval );
143126 count .incrementAndGet ();
144127 } catch (Exception ignored ) {
145128 LOG .warn ("dirty Data insert error ... Failed number: " + errorCount .incrementAndGet ());
146129 LOG .warn ("error dirty data:" + dirtyDataEntity .toString ());
147130 if (errorCount .get () > Math .ceil (count .longValue () * errorLimitRate )) {
148- throw new RuntimeException ("The number of failed number reaches the limit, manager fails" );
131+ throw new RuntimeException (String . format ( "The number of failed number 【%s】 reaches the limit, manager fails" , errorCount . get ()) );
149132 }
150133 }
151134 }
@@ -156,11 +139,4 @@ public void collectDirtyData(String dataInfo, String cause, String field) {
156139 public boolean checkConsumer () {
157140 return consumer .isRunning ();
158141 }
159-
160- /**
161- * 首字母大写
162- */
163- private static String upperCaseFirstChar (String str ) {
164- return str .substring (0 , 1 ).toUpperCase () + str .substring (1 );
165- }
166142}
0 commit comments