Skip to content

Commit

Permalink
Added uad to bot detection (#467)
Browse files Browse the repository at this point in the history
* user agent data

* removed member comment

* added to internals
  • Loading branch information
turtledreams committed Jan 4, 2024
1 parent 2997069 commit 8c4b464
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 75 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 23.12.4
- Enhanced userAgentData detection for bot filtering

## 23.12.3
- Added bot detection for workers
- Added the ability to clear stored device IDs in the workers
Expand Down
2 changes: 1 addition & 1 deletion cypress/integration/bridge_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function initMain(name, version) {
}

const SDK_NAME = "javascript_native_web";
const SDK_VERSION = "23.12.3";
const SDK_VERSION = "23.12.4";

// tests
describe("Bridged SDK Utilities Tests", () => {
Expand Down
2 changes: 2 additions & 0 deletions cypress/integration/user_agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ describe("User Agent tests ", () => {
hp.haltAndClearStorage(() => {
initMain();
// setting ua value to strings that can pass the regex test
expect(Countly._internals.userAgentSearchBotDetection("")).to.equal(false);
expect(Countly._internals.userAgentSearchBotDetection("123")).to.equal(false);
expect(Countly._internals.userAgentSearchBotDetection("Googlebot")).to.equal(true);
expect(Countly._internals.userAgentSearchBotDetection("Google")).to.equal(false);
expect(Countly._internals.userAgentSearchBotDetection("HeadlessChrome")).to.equal(true);
expect(Countly._internals.userAgentSearchBotDetection("Chrome-Lighthouse")).to.equal(true);
expect(Countly._internals.userAgentSearchBotDetection("Lighthouse")).to.equal(true);
});
});
});
56 changes: 40 additions & 16 deletions lib/countly.js
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
statusCode: "cly_hc_status_code",
errorMessage: "cly_hc_error_message"
});
var SDK_VERSION = "23.12.3";
var SDK_VERSION = "23.12.4";
var SDK_NAME = "javascript_native_web";

// Using this on document.referrer would return an array with 15 elements in it. The 12th element (array[11]) would be the path we are looking for. Others would be things like password and such (use https://regex101.com/ to check more)
Expand Down Expand Up @@ -519,23 +519,38 @@
return uaOverride;
}
var ua_raw = navigator.userAgent;
// check if userAgentData is supported and userAgent is not available, use it
// check if userAgentData is supported and userAgent is not available, then use it
if (!ua_raw) {
if (navigator.userAgentData) {
// turn brands array into string
ua_raw = navigator.userAgentData.brands.map(function (e) {
return e.brand + ":" + e.version;
}).join();
// add mobile info
ua_raw += navigator.userAgentData.mobile ? " mobi " : " ";
// add platform info
ua_raw += navigator.userAgentData.platform;
}
ua_raw = currentUserAgentDataString();
}
// RAW USER AGENT STRING
return ua_raw;
}

/**
* Forms user agent string from userAgentData by concatenating brand, version, mobile and platform
* @memberof Countly._internals
* @param {string} uaOverride - a string value to pass instead of ua value
* @returns {string} currentUserAgentString - user agent string from userAgentData
*/
function currentUserAgentDataString(uaOverride) {
if (uaOverride) {
return uaOverride;
}
var ua = "";
if (navigator.userAgentData) {
// turn brands array into string
ua = navigator.userAgentData.brands.map(function (e) {
return e.brand + ":" + e.version;
}).join();
// add mobile info
ua += navigator.userAgentData.mobile ? " mobi " : " ";
// add platform info
ua += navigator.userAgentData.platform;
}
return ua;
}

/**
* Returns device type information according to user agent string
* @memberof Countly._internals
Expand All @@ -547,7 +562,7 @@
// TODO: refactor here
if (uaOverride) {
userAgent = uaOverride;
} else if (navigator.userAgentData.mobile) {
} else if (navigator.userAgentData && navigator.userAgentData.mobile) {
return "phone";
} else {
userAgent = currentUserAgentString();
Expand Down Expand Up @@ -581,9 +596,17 @@
*/
function userAgentSearchBotDetection(uaOverride) {
// search bot regexp
var searchBotRE = /(CountlySiteBot|nuhk|Googlebot|GoogleSecurityScanner|Yammybot|Openbot|Slurp|MSNBot|Ask Jeeves\/Teoma|ia_archiver|bingbot|Google Web Preview|Mediapartners-Google|AdsBot-Google|Baiduspider|Ezooms|YahooSeeker|AltaVista|AVSearch|Mercator|Scooter|InfoSeek|Ultraseek|Lycos|Wget|YandexBot|Yandex|YaDirectFetcher|SiteBot|Exabot|AhrefsBot|MJ12bot|TurnitinBot|magpie-crawler|Nutch Crawler|CMS Crawler|rogerbot|Domnutch|ssearch_bot|XoviBot|netseer|digincore|fr-crawler|wesee|AliasIO|contxbot|PingdomBot|BingPreview|HeadlessChrome|Chrome-Lighthouse)/;
// true if the user agent string contains a search bot string pattern
return searchBotRE.test(uaOverride || currentUserAgentString());
var searchBotRE = /(CountlySiteBot|nuhk|Googlebot|GoogleSecurityScanner|Yammybot|Openbot|Slurp|MSNBot|Ask Jeeves\/Teoma|ia_archiver|bingbot|Google Web Preview|Mediapartners-Google|AdsBot-Google|Baiduspider|Ezooms|YahooSeeker|AltaVista|AVSearch|Mercator|Scooter|InfoSeek|Ultraseek|Lycos|Wget|YandexBot|Yandex|YaDirectFetcher|SiteBot|Exabot|AhrefsBot|MJ12bot|TurnitinBot|magpie-crawler|Nutch Crawler|CMS Crawler|rogerbot|Domnutch|ssearch_bot|XoviBot|netseer|digincore|fr-crawler|wesee|AliasIO|contxbot|PingdomBot|BingPreview|HeadlessChrome|Lighthouse)/;

// check override first
if (uaOverride) {
return searchBotRE.test(uaOverride);
}

// check both userAgent and userAgentData, as one of them might be containing the information we are looking for
var ua_bot = searchBotRE.test(currentUserAgentString());
var uaData_bot = searchBotRE.test(currentUserAgentDataString());
return ua_bot || uaData_bot;
}

/**
Expand Down Expand Up @@ -5161,6 +5184,7 @@
processScrollView: processScrollView,
processScroll: processScroll,
currentUserAgentString: currentUserAgentString,
currentUserAgentDataString: currentUserAgentDataString,
userAgentDeviceDetection: userAgentDeviceDetection,
userAgentSearchBotDetection: userAgentSearchBotDetection,
getRequestQueue: getRequestQueue,
Expand Down
Loading

0 comments on commit 8c4b464

Please sign in to comment.