Skip to content

Commit

Permalink
Merge pull request #654 from LogMANOriginal/cURL
Browse files Browse the repository at this point in the history
Use cURL instead of file_get_contents
  • Loading branch information
logmanoriginal committed Apr 6, 2018
2 parents de5f850 + 45c3dcb commit 0f93370
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 106 deletions.
8 changes: 3 additions & 5 deletions bridges/Arte7Bridge.php
Expand Up @@ -64,13 +64,11 @@ public function collectData(){
. $lang
. ($category != null ? '&category.code=' . $category : '');

$context = array(
'http' => array(
'header' => 'Authorization: Bearer '. self::API_TOKEN
)
$header = array(
'Authorization: Bearer ' . self::API_TOKEN
);

$input = getContents($url, false, stream_context_create($context)) or die('Could not request ARTE.');
$input = getContents($url, $header) or die('Could not request ARTE.');
$input_json = json_decode($input, true);

foreach($input_json['videos'] as $element) {
Expand Down
36 changes: 11 additions & 25 deletions bridges/FacebookBridge.php
Expand Up @@ -96,17 +96,15 @@ function extractFromDelimiters($string, $start, $end){
$captcha_action = $_SESSION['captcha_action'];
$captcha_fields = $_SESSION['captcha_fields'];
$captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
$http_options = array(
'http' => array(
'method' => 'POST',
'user_agent' => ini_get('user_agent'),
'header' => array("Content-type:
application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"),
'content' => http_build_query($captcha_fields)
),

$header = array("Content-type:
application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n");
$opts = array(
CURLOPT_POST => 1,
CURLOPT_POSTFIELDS => http_build_query($captcha_fields)
);
$context = stream_context_create($http_options);
$html = getContents($captcha_action, false, $context);

$html = getContents($captcha_action, $header, $opts);

if($html === false) {
returnServerError('Failed to submit captcha response back to Facebook');
Expand All @@ -120,30 +118,18 @@ function extractFromDelimiters($string, $start, $end){

//Retrieve page contents
if(is_null($html)) {
$http_options = array(
'http' => array(
'method' => 'GET',
'user_agent' => ini_get('user_agent'),
'header' => 'Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n"
)
);

$context = stream_context_create($http_options);
$header = array('Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n");

// First character cannot be a forward slash
if(strpos($this->getInput('u'), "/") === 0) {
returnClientError('Remove leading slash "/" from the username!');
}

if(!strpos($this->getInput('u'), "/")) {
$html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1',
false,
$context)
$html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', $header)
or returnServerError('No results for this query.');
} else {
$html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1',
false,
$context)
$html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', $header)
or returnServerError('No results for this query.');
}
}
Expand Down
2 changes: 0 additions & 2 deletions bridges/KernelBugTrackerBridge.php
Expand Up @@ -45,9 +45,7 @@ public function collectData(){
// We use the print preview page for simplicity
$html = getSimpleHTMLDOMCached($this->getURI() . '&format=multiple',
86400,
false,
null,
0,
null,
true,
true,
Expand Down
14 changes: 2 additions & 12 deletions bridges/VkBridge.php
Expand Up @@ -109,19 +109,9 @@ public function getContents()
{
ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0');

$opts = array(
'http' => array(
'method' => "GET",
'user_agent' => ini_get('user_agent'),
'accept_encoding' => 'gzip',
'header' => "Accept-language: en\r\n
Cookie: remixlang=3\r\n"
)
);

$context = stream_context_create($opts);
$header = array('Accept-language: en', 'Cookie: remixlang=3');

return getContents($this->getURI(), false, $context);
return getContents($this->getURI(), $header);
}


Expand Down
6 changes: 2 additions & 4 deletions bridges/YoutubeBridge.php
Expand Up @@ -140,10 +140,8 @@ private function ytBridgeFixTitle($title) {

private function ytGetSimpleHTMLDOM($url){
return getSimpleHTMLDOM($url,
$use_include_path = false,
$context = null,
$offset = 0,
$maxLen = null,
$header = array(),
$opts = array(),
$lowercase = true,
$forceTagsClosed = true,
$target_charset = DEFAULT_TARGET_CHARSET,
Expand Down
3 changes: 3 additions & 0 deletions index.php
Expand Up @@ -80,6 +80,9 @@
if(!extension_loaded('simplexml'))
die('"simplexml" extension not loaded. Please check "php.ini"');

if(!extension_loaded('curl'))
die('"curl" extension not loaded. Please check "php.ini"');

// configuration checks
if(ini_get('allow_url_fopen') !== "1")
die('"allow_url_fopen" is not set to "1". Please check "php.ini');
Expand Down
82 changes: 24 additions & 58 deletions lib/contents.php
@@ -1,77 +1,45 @@
<?php
function getContents($url,
$use_include_path = false,
$context = null,
$offset = 0,
$maxlen = null){
$contextOptions = array(
'http' => array(
'user_agent' => ini_get('user_agent'),
'accept_encoding' => 'gzip'
)
);
function getContents($url, $header = array(), $opts = array()){
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);

if(defined('PROXY_URL') && !defined('NOPROXY')) {
$contextOptions['http']['proxy'] = PROXY_URL;
$contextOptions['http']['request_fulluri'] = true;
if(is_array($header) && count($header) !== 0)
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);

curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent'));
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);

if(is_null($context)) {
$context = stream_context_create($contextOptions);
} else {
$prevContext = $context;
if(!stream_context_set_option($context, $contextOptions)) {
$context = $prevContext;
}
if(is_array($opts)) {
foreach($opts as $key => $value) {
curl_setopt($ch, $key, $value);
}
}

if(is_null($maxlen)) {
$content = file_get_contents($url, $use_include_path, $context, $offset);
} else {
$content = file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
if(defined('PROXY_URL') && !defined('NOPROXY')) {
curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
}

$content = curl_exec($ch);
curl_close($ch);

if($content === false)
debugMessage('Cant\'t download ' . $url);

// handle compressed data
foreach($http_response_header as $header) {
if(stristr($header, 'content-encoding')) {
switch(true) {
case stristr($header, 'gzip'):
$content = gzinflate(substr($content, 10, -8));
break;
case stristr($header, 'compress'):
//TODO
case stristr($header, 'deflate'):
//TODO
case stristr($header, 'brotli'):
//TODO
returnServerError($header . '=> Not implemented yet');
break;
case stristr($header, 'identity'):
break;
default:
returnServerError($header . '=> Unknown compression');
}
}
}

return $content;
}

function getSimpleHTMLDOM($url,
$use_include_path = false,
$context = null,
$offset = 0,
$maxLen = null,
$header = array(),
$opts = array(),
$lowercase = true,
$forceTagsClosed = true,
$target_charset = DEFAULT_TARGET_CHARSET,
$stripRN = true,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT){
$content = getContents($url, $use_include_path, $context, $offset, $maxLen);
$content = getContents($url, $header, $opts);
return str_get_html($content,
$lowercase,
$forceTagsClosed,
Expand All @@ -89,10 +57,8 @@ function getSimpleHTMLDOM($url,
*/
function getSimpleHTMLDOMCached($url,
$duration = 86400,
$use_include_path = false,
$context = null,
$offset = 0,
$maxLen = null,
$header = array(),
$opts = array(),
$lowercase = true,
$forceTagsClosed = true,
$target_charset = DEFAULT_TARGET_CHARSET,
Expand All @@ -116,7 +82,7 @@ function getSimpleHTMLDOMCached($url,
&& (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration
$content = $cache->loadData();
} else { // Content not within duration
$content = getContents($url, $use_include_path, $context, $offset, $maxLen);
$content = getContents($url, $header, $opts);
if($content !== false) {
$cache->saveData($content);
}
Expand Down

0 comments on commit 0f93370

Please sign in to comment.