From 7942cbb912e4deee544422e7d017168868a17c95 Mon Sep 17 00:00:00 2001
From: DeeJay0921 <1018805743@qq.com>
Date: Fri, 10 Jan 2020 13:57:06 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Mybatis=E8=AE=BF=E9=97=AE?=
=?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=96=B9=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
引入了Mybatis作为新的数据访问方式
同时也增加了相关的Mybatis配置和sql映射xml
---
pom.xml | 6 ++
.../java/com/github/DeeJay0921/Crawler.java | 8 +-
.../com/github/DeeJay0921/CrawlerDao.java | 8 +-
.../com/github/DeeJay0921/JdbcCrawlerDao.java | 14 +++-
.../github/DeeJay0921/MybatisCrawlerDao.java | 74 +++++++++++++++++++
src/main/java/com/github/DeeJay0921/News.java | 48 ++++++++++++
src/main/resources/db/mybatis/myMapper.xml | 38 ++++++++++
.../resources/db/mybatis/mybatis-config.xml | 21 ++++++
8 files changed, 209 insertions(+), 8 deletions(-)
create mode 100644 src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java
create mode 100644 src/main/java/com/github/DeeJay0921/News.java
create mode 100644 src/main/resources/db/mybatis/myMapper.xml
create mode 100644 src/main/resources/db/mybatis/mybatis-config.xml
diff --git a/pom.xml b/pom.xml
index 87b53be..cb5947c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -52,6 +52,12 @@
1.4.199
compile
+
+
+ org.mybatis
+ mybatis
+ 3.5.3
+
diff --git a/src/main/java/com/github/DeeJay0921/Crawler.java b/src/main/java/com/github/DeeJay0921/Crawler.java
index 0d6c361..a19943f 100644
--- a/src/main/java/com/github/DeeJay0921/Crawler.java
+++ b/src/main/java/com/github/DeeJay0921/Crawler.java
@@ -25,7 +25,7 @@
public class Crawler {
- private CrawlerDao dao = new JdbcCrawlerDao();
+ private CrawlerDao dao = new MybatisCrawlerDao();
public static void main(String[] args) throws SQLException {
new Crawler().run();
@@ -48,7 +48,8 @@ public void run() throws SQLException {
// 对于新闻页做额外处理
storeIntoDataBaseIfIsNews(link, document);
// 将访问过的链接加入已处理的数据库
- dao.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )");
+ dao.insertLinkIntoProcessed(link);
+// dao.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )");
}
}
}
@@ -59,7 +60,8 @@ private void insertNewLinksToDatabase(Document document) throws SQLException {
for (Element alink : aLinks) {
String href = alink.attr("href");
if (isInterestingLink(href)) {
- dao.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )");
+ dao.insertLinkIntoToBeProcessed(href);
+// dao.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )");
}
}
}
diff --git a/src/main/java/com/github/DeeJay0921/CrawlerDao.java b/src/main/java/com/github/DeeJay0921/CrawlerDao.java
index a0477aa..de89561 100644
--- a/src/main/java/com/github/DeeJay0921/CrawlerDao.java
+++ b/src/main/java/com/github/DeeJay0921/CrawlerDao.java
@@ -6,13 +6,15 @@
* 爬虫数据访问方式的标准接口
*/
public interface CrawlerDao {
- String getNextLink(String sql) throws SQLException;
-
String getNextLinkThenDelete() throws SQLException;
- void updateDataBase(String link, String sql) throws SQLException;
+// void updateDataBase(String link, String sql) throws SQLException;
void insertNewsIntoDatabase(String link, String articleTitle, String articleContent) throws SQLException;
boolean isLinkProcessed(String link) throws SQLException;
+
+ void insertLinkIntoProcessed(String link) throws SQLException;
+
+ void insertLinkIntoToBeProcessed(String href) throws SQLException;
}
diff --git a/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java b/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java
index 4094548..cb0a14e 100644
--- a/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java
+++ b/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java
@@ -21,7 +21,7 @@ public class JdbcCrawlerDao implements CrawlerDao {
}
}
- public String getNextLink(String sql) throws SQLException {
+ private String getNextLink(String sql) throws SQLException {
String link = null;
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;
@@ -51,7 +51,7 @@ public String getNextLinkThenDelete() throws SQLException {
return link;
}
- public void updateDataBase(String link, String sql) throws SQLException {
+ private void updateDataBase(String link, String sql) throws SQLException {
PreparedStatement preparedStatement = null;
try {
preparedStatement = connection.prepareStatement(sql);
@@ -99,4 +99,14 @@ public boolean isLinkProcessed(String link) throws SQLException {
}
return false;
}
+
+ @Override
+ public void insertLinkIntoProcessed(String link) throws SQLException {
+ this.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )");
+ }
+
+ @Override
+ public void insertLinkIntoToBeProcessed(String href) throws SQLException {
+ this.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )");
+ }
}
diff --git a/src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java b/src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java
new file mode 100644
index 0000000..bf253ce
--- /dev/null
+++ b/src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java
@@ -0,0 +1,74 @@
+package com.github.DeeJay0921;
+
+import org.apache.ibatis.io.Resources;
+import org.apache.ibatis.session.SqlSession;
+import org.apache.ibatis.session.SqlSessionFactory;
+import org.apache.ibatis.session.SqlSessionFactoryBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.SQLException;
+import java.util.HashMap;
+
+public class MybatisCrawlerDao implements CrawlerDao {
+ private SqlSessionFactory sqlSessionFactory;
+
+ public MybatisCrawlerDao() {
+ try {
+ String resource = "db/mybatis/mybatis-config.xml";
+ InputStream inputStream = Resources.getResourceAsStream(resource);
+ sqlSessionFactory = new SqlSessionFactoryBuilder().build(inputStream);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public String getNextLinkThenDelete() throws SQLException {
+ String link;
+ // 这里的openSession 的参数autoCommit一定要为true,否则每次的删除就没有被提交到数据库
+ try (SqlSession session = sqlSessionFactory.openSession(true)) {
+ link = session.selectOne(
+ "com.github.DeeJay0921.mybatis.selectNextLink"); // 这边输入Mapper.xml里面的命名空间加Select语句的id
+ if (link != null) {
+ session.delete("com.github.DeeJay0921.mybatis.deleteLink", link);
+ }
+ }
+ return link;
+ }
+
+ @Override
+ public void insertNewsIntoDatabase(String link, String articleTitle, String articleContent) throws SQLException {
+ try (SqlSession session = sqlSessionFactory.openSession(true)) {
+ session.insert("com.github.DeeJay0921.mybatis.insertNews", new News(articleTitle, articleContent, link));
+ }
+ }
+
+ @Override
+ public boolean isLinkProcessed(String link) throws SQLException {
+ int count;
+ try (SqlSession session = sqlSessionFactory.openSession()) {
+ count = session.selectOne("com.github.DeeJay0921.mybatis.countLink", link);
+ }
+ return 0 != count;
+ }
+
+ @Override
+ public void insertLinkIntoProcessed(String link) {
+ this.insertIntoDifferentTable("LINKS_ALREADY_PROCESSED", link);
+ }
+
+ @Override
+ public void insertLinkIntoToBeProcessed(String href) {
+ this.insertIntoDifferentTable("LINKS_TO_BE_PROCESSED", href);
+ }
+
+ private void insertIntoDifferentTable(String tableName, String link) {
+ HashMap param = new HashMap<>();
+ param.put("tableName", tableName);
+ param.put("link", link);
+ try (SqlSession session = sqlSessionFactory.openSession(true)) {
+ session.insert("com.github.DeeJay0921.mybatis.insertLink", param);
+ }
+ }
+}
diff --git a/src/main/java/com/github/DeeJay0921/News.java b/src/main/java/com/github/DeeJay0921/News.java
new file mode 100644
index 0000000..fa01cc4
--- /dev/null
+++ b/src/main/java/com/github/DeeJay0921/News.java
@@ -0,0 +1,48 @@
+package com.github.DeeJay0921;
+
+import java.math.BigInteger;
+
+public class News {
+ private BigInteger id;
+ private String title;
+ private String content;
+ private String url;
+
+ public News(String title, String content, String url) {
+ this.title = title;
+ this.content = content;
+ this.url = url;
+ }
+
+ public BigInteger getId() {
+ return id;
+ }
+
+ public void setId(BigInteger id) {
+ this.id = id;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ this.content = content;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+}
diff --git a/src/main/resources/db/mybatis/myMapper.xml b/src/main/resources/db/mybatis/myMapper.xml
new file mode 100644
index 0000000..48ad2c8
--- /dev/null
+++ b/src/main/resources/db/mybatis/myMapper.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+ delete from LINKS_TO_BE_PROCESSED where link = #{link}
+
+
+
+ insert into NEWS (TITLE, CONTENT, URL)
+ values (#{title},#{content},#{url})
+
+
+
+
+
+ insert into
+
+
+ LINKS_TO_BE_PROCESSED
+
+
+ LINKS_ALREADY_PROCESSED
+
+
+ (LINK)
+ values (#{link})
+
+
\ No newline at end of file
diff --git a/src/main/resources/db/mybatis/mybatis-config.xml b/src/main/resources/db/mybatis/mybatis-config.xml
new file mode 100644
index 0000000..1b74ae2
--- /dev/null
+++ b/src/main/resources/db/mybatis/mybatis-config.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file