From 7942cbb912e4deee544422e7d017168868a17c95 Mon Sep 17 00:00:00 2001 From: DeeJay0921 <1018805743@qq.com> Date: Fri, 10 Jan 2020 13:57:06 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Mybatis=E8=AE=BF=E9=97=AE?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 引入了Mybatis作为新的数据访问方式 同时也增加了相关的Mybatis配置和sql映射xml --- pom.xml | 6 ++ .../java/com/github/DeeJay0921/Crawler.java | 8 +- .../com/github/DeeJay0921/CrawlerDao.java | 8 +- .../com/github/DeeJay0921/JdbcCrawlerDao.java | 14 +++- .../github/DeeJay0921/MybatisCrawlerDao.java | 74 +++++++++++++++++++ src/main/java/com/github/DeeJay0921/News.java | 48 ++++++++++++ src/main/resources/db/mybatis/myMapper.xml | 38 ++++++++++ .../resources/db/mybatis/mybatis-config.xml | 21 ++++++ 8 files changed, 209 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java create mode 100644 src/main/java/com/github/DeeJay0921/News.java create mode 100644 src/main/resources/db/mybatis/myMapper.xml create mode 100644 src/main/resources/db/mybatis/mybatis-config.xml diff --git a/pom.xml b/pom.xml index 87b53be..cb5947c 100644 --- a/pom.xml +++ b/pom.xml @@ -52,6 +52,12 @@ 1.4.199 compile + + + org.mybatis + mybatis + 3.5.3 + diff --git a/src/main/java/com/github/DeeJay0921/Crawler.java b/src/main/java/com/github/DeeJay0921/Crawler.java index 0d6c361..a19943f 100644 --- a/src/main/java/com/github/DeeJay0921/Crawler.java +++ b/src/main/java/com/github/DeeJay0921/Crawler.java @@ -25,7 +25,7 @@ public class Crawler { - private CrawlerDao dao = new JdbcCrawlerDao(); + private CrawlerDao dao = new MybatisCrawlerDao(); public static void main(String[] args) throws SQLException { new Crawler().run(); @@ -48,7 +48,8 @@ public void run() throws SQLException { // 对于新闻页做额外处理 storeIntoDataBaseIfIsNews(link, document); // 将访问过的链接加入已处理的数据库 - dao.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )"); + dao.insertLinkIntoProcessed(link); +// dao.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )"); } } } @@ -59,7 +60,8 @@ private void insertNewLinksToDatabase(Document document) throws SQLException { for (Element alink : aLinks) { String href = alink.attr("href"); if (isInterestingLink(href)) { - dao.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )"); + dao.insertLinkIntoToBeProcessed(href); +// dao.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )"); } } } diff --git a/src/main/java/com/github/DeeJay0921/CrawlerDao.java b/src/main/java/com/github/DeeJay0921/CrawlerDao.java index a0477aa..de89561 100644 --- a/src/main/java/com/github/DeeJay0921/CrawlerDao.java +++ b/src/main/java/com/github/DeeJay0921/CrawlerDao.java @@ -6,13 +6,15 @@ * 爬虫数据访问方式的标准接口 */ public interface CrawlerDao { - String getNextLink(String sql) throws SQLException; - String getNextLinkThenDelete() throws SQLException; - void updateDataBase(String link, String sql) throws SQLException; +// void updateDataBase(String link, String sql) throws SQLException; void insertNewsIntoDatabase(String link, String articleTitle, String articleContent) throws SQLException; boolean isLinkProcessed(String link) throws SQLException; + + void insertLinkIntoProcessed(String link) throws SQLException; + + void insertLinkIntoToBeProcessed(String href) throws SQLException; } diff --git a/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java b/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java index 4094548..cb0a14e 100644 --- a/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java +++ b/src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java @@ -21,7 +21,7 @@ public class JdbcCrawlerDao implements CrawlerDao { } } - public String getNextLink(String sql) throws SQLException { + private String getNextLink(String sql) throws SQLException { String link = null; PreparedStatement preparedStatement = null; ResultSet resultSet = null; @@ -51,7 +51,7 @@ public String getNextLinkThenDelete() throws SQLException { return link; } - public void updateDataBase(String link, String sql) throws SQLException { + private void updateDataBase(String link, String sql) throws SQLException { PreparedStatement preparedStatement = null; try { preparedStatement = connection.prepareStatement(sql); @@ -99,4 +99,14 @@ public boolean isLinkProcessed(String link) throws SQLException { } return false; } + + @Override + public void insertLinkIntoProcessed(String link) throws SQLException { + this.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )"); + } + + @Override + public void insertLinkIntoToBeProcessed(String href) throws SQLException { + this.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )"); + } } diff --git a/src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java b/src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java new file mode 100644 index 0000000..bf253ce --- /dev/null +++ b/src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java @@ -0,0 +1,74 @@ +package com.github.DeeJay0921; + +import org.apache.ibatis.io.Resources; +import org.apache.ibatis.session.SqlSession; +import org.apache.ibatis.session.SqlSessionFactory; +import org.apache.ibatis.session.SqlSessionFactoryBuilder; + +import java.io.IOException; +import java.io.InputStream; +import java.sql.SQLException; +import java.util.HashMap; + +public class MybatisCrawlerDao implements CrawlerDao { + private SqlSessionFactory sqlSessionFactory; + + public MybatisCrawlerDao() { + try { + String resource = "db/mybatis/mybatis-config.xml"; + InputStream inputStream = Resources.getResourceAsStream(resource); + sqlSessionFactory = new SqlSessionFactoryBuilder().build(inputStream); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public String getNextLinkThenDelete() throws SQLException { + String link; + // 这里的openSession 的参数autoCommit一定要为true,否则每次的删除就没有被提交到数据库 + try (SqlSession session = sqlSessionFactory.openSession(true)) { + link = session.selectOne( + "com.github.DeeJay0921.mybatis.selectNextLink"); // 这边输入Mapper.xml里面的命名空间加Select语句的id + if (link != null) { + session.delete("com.github.DeeJay0921.mybatis.deleteLink", link); + } + } + return link; + } + + @Override + public void insertNewsIntoDatabase(String link, String articleTitle, String articleContent) throws SQLException { + try (SqlSession session = sqlSessionFactory.openSession(true)) { + session.insert("com.github.DeeJay0921.mybatis.insertNews", new News(articleTitle, articleContent, link)); + } + } + + @Override + public boolean isLinkProcessed(String link) throws SQLException { + int count; + try (SqlSession session = sqlSessionFactory.openSession()) { + count = session.selectOne("com.github.DeeJay0921.mybatis.countLink", link); + } + return 0 != count; + } + + @Override + public void insertLinkIntoProcessed(String link) { + this.insertIntoDifferentTable("LINKS_ALREADY_PROCESSED", link); + } + + @Override + public void insertLinkIntoToBeProcessed(String href) { + this.insertIntoDifferentTable("LINKS_TO_BE_PROCESSED", href); + } + + private void insertIntoDifferentTable(String tableName, String link) { + HashMap param = new HashMap<>(); + param.put("tableName", tableName); + param.put("link", link); + try (SqlSession session = sqlSessionFactory.openSession(true)) { + session.insert("com.github.DeeJay0921.mybatis.insertLink", param); + } + } +} diff --git a/src/main/java/com/github/DeeJay0921/News.java b/src/main/java/com/github/DeeJay0921/News.java new file mode 100644 index 0000000..fa01cc4 --- /dev/null +++ b/src/main/java/com/github/DeeJay0921/News.java @@ -0,0 +1,48 @@ +package com.github.DeeJay0921; + +import java.math.BigInteger; + +public class News { + private BigInteger id; + private String title; + private String content; + private String url; + + public News(String title, String content, String url) { + this.title = title; + this.content = content; + this.url = url; + } + + public BigInteger getId() { + return id; + } + + public void setId(BigInteger id) { + this.id = id; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } +} diff --git a/src/main/resources/db/mybatis/myMapper.xml b/src/main/resources/db/mybatis/myMapper.xml new file mode 100644 index 0000000..48ad2c8 --- /dev/null +++ b/src/main/resources/db/mybatis/myMapper.xml @@ -0,0 +1,38 @@ + + + + + + + + delete from LINKS_TO_BE_PROCESSED where link = #{link} + + + + insert into NEWS (TITLE, CONTENT, URL) + values (#{title},#{content},#{url}) + + + + + + insert into + + + LINKS_TO_BE_PROCESSED + + + LINKS_ALREADY_PROCESSED + + + (LINK) + values (#{link}) + + \ No newline at end of file diff --git a/src/main/resources/db/mybatis/mybatis-config.xml b/src/main/resources/db/mybatis/mybatis-config.xml new file mode 100644 index 0000000..1b74ae2 --- /dev/null +++ b/src/main/resources/db/mybatis/mybatis-config.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file