Skip to content

Commit

Permalink
增加Mybatis访问数据方式
Browse files Browse the repository at this point in the history
引入了Mybatis作为新的数据访问方式

同时也增加了相关的Mybatis配置和sql映射xml
  • Loading branch information
DeeJay0921 committed Jan 10, 2020
1 parent e72d0dd commit 7942cbb
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 8 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Expand Up @@ -52,6 +52,12 @@
<version>1.4.199</version>
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mybatis/mybatis -->
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.5.3</version>
</dependency>

</dependencies>

Expand Down
8 changes: 5 additions & 3 deletions src/main/java/com/github/DeeJay0921/Crawler.java
Expand Up @@ -25,7 +25,7 @@

public class Crawler {

private CrawlerDao dao = new JdbcCrawlerDao();
private CrawlerDao dao = new MybatisCrawlerDao();

public static void main(String[] args) throws SQLException {
new Crawler().run();
Expand All @@ -48,7 +48,8 @@ public void run() throws SQLException {
// 对于新闻页做额外处理
storeIntoDataBaseIfIsNews(link, document);
// 将访问过的链接加入已处理的数据库
dao.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )");
dao.insertLinkIntoProcessed(link);
// dao.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )");
}
}
}
Expand All @@ -59,7 +60,8 @@ private void insertNewLinksToDatabase(Document document) throws SQLException {
for (Element alink : aLinks) {
String href = alink.attr("href");
if (isInterestingLink(href)) {
dao.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )");
dao.insertLinkIntoToBeProcessed(href);
// dao.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )");
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions src/main/java/com/github/DeeJay0921/CrawlerDao.java
Expand Up @@ -6,13 +6,15 @@
* 爬虫数据访问方式的标准接口
*/
public interface CrawlerDao {
String getNextLink(String sql) throws SQLException;

String getNextLinkThenDelete() throws SQLException;

void updateDataBase(String link, String sql) throws SQLException;
// void updateDataBase(String link, String sql) throws SQLException;

void insertNewsIntoDatabase(String link, String articleTitle, String articleContent) throws SQLException;

boolean isLinkProcessed(String link) throws SQLException;

void insertLinkIntoProcessed(String link) throws SQLException;

void insertLinkIntoToBeProcessed(String href) throws SQLException;
}
14 changes: 12 additions & 2 deletions src/main/java/com/github/DeeJay0921/JdbcCrawlerDao.java
Expand Up @@ -21,7 +21,7 @@ public class JdbcCrawlerDao implements CrawlerDao {
}
}

public String getNextLink(String sql) throws SQLException {
private String getNextLink(String sql) throws SQLException {
String link = null;
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;
Expand Down Expand Up @@ -51,7 +51,7 @@ public String getNextLinkThenDelete() throws SQLException {
return link;
}

public void updateDataBase(String link, String sql) throws SQLException {
private void updateDataBase(String link, String sql) throws SQLException {
PreparedStatement preparedStatement = null;
try {
preparedStatement = connection.prepareStatement(sql);
Expand Down Expand Up @@ -99,4 +99,14 @@ public boolean isLinkProcessed(String link) throws SQLException {
}
return false;
}

@Override
public void insertLinkIntoProcessed(String link) throws SQLException {
this.updateDataBase(link, "insert into LINKS_ALREADY_PROCESSED values ( ? )");
}

@Override
public void insertLinkIntoToBeProcessed(String href) throws SQLException {
this.updateDataBase(href, "insert into LINKS_TO_BE_PROCESSED values ( ? )");
}
}
74 changes: 74 additions & 0 deletions src/main/java/com/github/DeeJay0921/MybatisCrawlerDao.java
@@ -0,0 +1,74 @@
package com.github.DeeJay0921;

import org.apache.ibatis.io.Resources;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;

import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
import java.util.HashMap;

public class MybatisCrawlerDao implements CrawlerDao {
private SqlSessionFactory sqlSessionFactory;

public MybatisCrawlerDao() {
try {
String resource = "db/mybatis/mybatis-config.xml";
InputStream inputStream = Resources.getResourceAsStream(resource);
sqlSessionFactory = new SqlSessionFactoryBuilder().build(inputStream);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

@Override
public String getNextLinkThenDelete() throws SQLException {
String link;
// 这里的openSession 的参数autoCommit一定要为true,否则每次的删除就没有被提交到数据库
try (SqlSession session = sqlSessionFactory.openSession(true)) {
link = session.selectOne(
"com.github.DeeJay0921.mybatis.selectNextLink"); // 这边输入Mapper.xml里面的命名空间加Select语句的id
if (link != null) {
session.delete("com.github.DeeJay0921.mybatis.deleteLink", link);
}
}
return link;
}

@Override
public void insertNewsIntoDatabase(String link, String articleTitle, String articleContent) throws SQLException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
session.insert("com.github.DeeJay0921.mybatis.insertNews", new News(articleTitle, articleContent, link));
}
}

@Override
public boolean isLinkProcessed(String link) throws SQLException {
int count;
try (SqlSession session = sqlSessionFactory.openSession()) {
count = session.selectOne("com.github.DeeJay0921.mybatis.countLink", link);
}
return 0 != count;
}

@Override
public void insertLinkIntoProcessed(String link) {
this.insertIntoDifferentTable("LINKS_ALREADY_PROCESSED", link);
}

@Override
public void insertLinkIntoToBeProcessed(String href) {
this.insertIntoDifferentTable("LINKS_TO_BE_PROCESSED", href);
}

private void insertIntoDifferentTable(String tableName, String link) {
HashMap<String, String> param = new HashMap<>();
param.put("tableName", tableName);
param.put("link", link);
try (SqlSession session = sqlSessionFactory.openSession(true)) {
session.insert("com.github.DeeJay0921.mybatis.insertLink", param);
}
}
}
48 changes: 48 additions & 0 deletions src/main/java/com/github/DeeJay0921/News.java
@@ -0,0 +1,48 @@
package com.github.DeeJay0921;

import java.math.BigInteger;

public class News {
private BigInteger id;
private String title;
private String content;
private String url;

public News(String title, String content, String url) {
this.title = title;
this.content = content;
this.url = url;
}

public BigInteger getId() {
return id;
}

public void setId(BigInteger id) {
this.id = id;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}

public String getUrl() {
return url;
}

public void setUrl(String url) {
this.url = url;
}
}
38 changes: 38 additions & 0 deletions src/main/resources/db/mybatis/myMapper.xml
@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.github.DeeJay0921.mybatis">
<!--从库里读取下一条待爬取的链接 返回结果类型为String-->
<select id="selectNextLink" resultType="String">
SELECT link FROM LINKS_TO_BE_PROCESSED LIMIT 1
</select>
<!--从库里删除访问过的链接-->
<delete id="deleteLink" parameterType="String">
delete from LINKS_TO_BE_PROCESSED where link = #{link}
</delete>
<!--插入一个News 包名要写全 插入的字段已News类内部的字段为准-->
<insert id="insertNews"
parameterType="com.github.DeeJay0921.News">
insert into NEWS (TITLE, CONTENT, URL)
values (#{title},#{content},#{url})
</insert>
<!--从已经访问过的连接池里读取链接看是否存在-->
<select id="countLink" resultType="int" parameterType="String">
SELECT count(link) FROM LINKS_ALREADY_PROCESSED where LINK = #{link}
</select>
<!--采用动态SQL决定向那张表插入链接-->
<insert id="insertLink" parameterType="HashMap">
insert into
<choose>
<when test="tableName == 'LINKS_TO_BE_PROCESSED'">
LINKS_TO_BE_PROCESSED
</when>
<otherwise>
LINKS_ALREADY_PROCESSED
</otherwise>
</choose>
(LINK)
values (#{link})
</insert>
</mapper>
21 changes: 21 additions & 0 deletions src/main/resources/db/mybatis/mybatis-config.xml
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE configuration
PUBLIC "-//mybatis.org//DTD Config 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>
<environments default="development">
<environment id="development">
<transactionManager type="JDBC"/>
<dataSource type="POOLED">
<property name="driver" value="org.h2.Driver"/>
<property name="url" value="jdbc:h2:file:./news"/>
<!--<property name="username" value="${username}"/>-->
<!--<property name="password" value="${password}"/>-->
</dataSource>
</environment>
</environments>
<mappers>
<!--映射关系文件-->
<mapper resource="db/mybatis/myMapper.xml"/>
</mappers>
</configuration>

0 comments on commit 7942cbb

Please sign in to comment.