Skip to content

Commit

Permalink
Merge 7dca130 into c486630
Browse files Browse the repository at this point in the history
  • Loading branch information
lwj5 authored Jan 7, 2019
2 parents c486630 + 7dca130 commit c95a422
Showing 1 changed file with 99 additions and 8 deletions.
107 changes: 99 additions & 8 deletions src/main/java/ai/preferred/venom/fetcher/AsyncFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.http.HttpHost;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.RedirectStrategy;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
Expand All @@ -50,6 +51,7 @@
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import javax.net.ssl.SSLContext;
import javax.validation.constraints.NotNull;
import java.io.IOException;
import java.net.URI;
Expand Down Expand Up @@ -174,14 +176,21 @@ private AsyncFetcher(final Builder builder) {
final IOReactorConfig reactorConfig = IOReactorConfig.custom()
.setIoThreadCount(builder.numIoThreads)
.setSoKeepAlive(true)
.setTcpNoDelay(true)
.setConnectTimeout(builder.connectTimeout)
.setSoTimeout(builder.socketTimeout)
.build();

final HttpAsyncClientBuilder clientBuilder = HttpAsyncClientBuilder.create()
.setDefaultIOReactorConfig(reactorConfig)
.setThreadFactory(builder.threadFactory);
.setThreadFactory(builder.threadFactory)
.setMaxConnPerRoute(builder.maxRouteConnections)
.setMaxConnTotal(builder.maxConnections)
.setSSLContext(builder.sslContext)
.setRedirectStrategy(builder.redirectStrategy);

if (builder.disableCookies) {
clientBuilder.disableCookieManagement();
}

if (builder.compressed) {
clientBuilder.addInterceptorLast(new RequestAcceptEncoding());
Expand Down Expand Up @@ -422,6 +431,11 @@ public static class Builder {
*/
private final List<Callback> callbacks;

/**
* Determines whether cookie storage is allowed.
*/
private boolean disableCookies;

/**
* The file manager used to store raw responses.
*/
Expand All @@ -437,11 +451,26 @@ public static class Builder {
*/
private int numIoThreads;

/**
* The maximum number of connections allowed.
*/
private int maxConnections;

/**
* The maximum number of connections allowed per route.
*/
private int maxRouteConnections;

/**
* The proxy provider for proxies.
*/
private ProxyProvider proxyProvider;

/**
* The SSL context for a response.
*/
private SSLContext sslContext;

/**
* A list of status code to stop retry.
*/
Expand All @@ -462,6 +491,11 @@ public static class Builder {
*/
private Validator validator;

/**
* The redirection strategy for a response.
*/
private RedirectStrategy redirectStrategy;

/**
* The validator router used.
*/
Expand Down Expand Up @@ -492,8 +526,11 @@ public static class Builder {
*/
private Builder() {
callbacks = new ArrayList<>();
disableCookies = false;
fileManager = null;
headers = Collections.emptyMap();
maxConnections = 0;
maxRouteConnections = 0;
numIoThreads = Runtime.getRuntime().availableProcessors();
proxyProvider = null;
stopCodes = Collections.emptySet();
Expand Down Expand Up @@ -526,6 +563,16 @@ public Builder register(final @NotNull Callback callback) {
return this;
}

/**
* Disables cookie storage.
*
* @return this
*/
public Builder disableCookies() {
this.disableCookies = true;
return this;
}

/**
* Sets the FileManager to be used. Defaults to none.
* <p>
Expand Down Expand Up @@ -562,21 +609,55 @@ public Builder numIoThreads(final int numIoThreads) {
return this;
}

/**
* Sets the maximum allowable connections at an instance.
*
* @param maxConnections the max allowable connections.
* @return this
*/
public Builder setMaxConnections(int maxConnections) {
this.maxConnections = maxConnections;
return this;
}

/**
* Sets the maximum allowable connections at an instance for
* a particular route (host).
*
* @param maxRouteConnections the max allowable connections per route.
* @return this
*/
public Builder setMaxRouteConnections(int maxRouteConnections) {
this.maxRouteConnections = maxRouteConnections;
return this;
}

/**
* Sets the ProxyProvider to be used. Defaults to none.
*
* @param proxyProvider proxy provider to be used
* @param proxyProvider proxy provider to be used.
* @return this
*/
public Builder proxyProvider(final @NotNull ProxyProvider proxyProvider) {
this.proxyProvider = proxyProvider;
return this;
}

/**
* Sets the ssl context for an encrypted response.
*
* @param sslContext SSLContext to be used.
* @return this
*/
public Builder setSslContext(SSLContext sslContext) {
this.sslContext = sslContext;
return this;
}

/**
* Set a list of stop code that will interrupt crawling.
*
* @param codes A list of stop codes
* @param codes A list of stop codes.
* @return this
*/
public Builder stopCodes(final int... codes) {
Expand Down Expand Up @@ -643,6 +724,17 @@ public Builder validator(final @NotNull Validator... validators) {
return this;
}

/**
* Sets the redirection strategy for a response received by the fetcher.
*
* @param redirectStrategy redirection strategy to be used.
* @return this
*/
public Builder setRedirectStrategy(RedirectStrategy redirectStrategy) {
this.redirectStrategy = redirectStrategy;
return this;
}

/**
* Sets ValidatorRouter to be used. Defaults to none.
* Validator rules set in validator will always be used.
Expand Down Expand Up @@ -694,14 +786,13 @@ public Builder socketTimeout(final int socketTimeout) {
}

/**
* Set whether to request for compress pages and to decompress pages
* Disables request for compress pages and to decompress pages
* after it is fetched. Defaults to true.
*
* @param compressed should request for compress pages
* @return this
*/
public Builder compressed(final boolean compressed) {
this.compressed = compressed;
public Builder disableCompression() {
this.compressed = false;
return this;
}

Expand Down

0 comments on commit c95a422

Please sign in to comment.