Skip to content

Commit

Permalink
Feat: update phishing domain threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed May 18, 2024
1 parent 6380d0b commit b5a6e05
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 63 deletions.
1 change: 0 additions & 1 deletion Build/build-reject-domainset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import createKeywordFilter from './lib/aho-corasick';
import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
import { sortDomains } from './lib/stable-sort-domain';
import { task } from './trace';
import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
import * as tldts from 'tldts';
import { SHARED_DESCRIPTION } from './lib/constants';
import { getPhishingDomains } from './lib/get-phishing-domains';
Expand Down
10 changes: 10 additions & 0 deletions Build/lib/get-phishing-domains.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// eslint-disable-next-line import-x/no-unresolved -- bun
import { describe, expect, it } from 'bun:test';

import { calcDomainAbuseScore } from './get-phishing-domains';

describe('sortDomains', () => {
it('nmdj.pl', () => {
console.log(calcDomainAbuseScore('.01462ccca801fed55370d79231c876e5.nmdj.pl'));
});
});
144 changes: 82 additions & 62 deletions Build/lib/get-phishing-domains.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,23 @@ const BLACK_TLD = new Set([
]);

export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
const [domainSet, domainSet2, gorhill] = await Promise.all([
processDomainLists(span, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
isCI
? processDomainLists(span, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
: null,
getGorhillPublicSuffixPromise()
]);
if (domainSet2) {
const gorhill = await getGorhillPublicSuffixPromise();

const domainSet = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
const [domainSet, domainSet2] = await Promise.all([
processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
]);

SetAdd(domainSet, domainSet2);
}

span.traceChildSync('whitelisting phishing domains', (parentSpan) => {
const trieForRemovingWhiteListed = parentSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));
return domainSet;
});

return parentSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
span.traceChildSync('whitelisting phishing domains', (curSpan) => {
const trieForRemovingWhiteListed = curSpan.traceChildSync('create trie for whitelisting', () => createTrie(domainSet));

return curSpan.traceChild('delete whitelisted from domainset').traceSyncFn(() => {
for (let i = 0, len = WHITELIST_DOMAIN.length; i < len; i++) {
const white = WHITELIST_DOMAIN[i];
domainSet.delete(white);
Expand All @@ -134,68 +136,28 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
const domainArr = Array.from(domainSet);

for (let i = 0, len = domainArr.length; i < len; i++) {
const line = processLine(domainArr[i]);
if (!line) continue;

const apexDomain = gorhill.getDomain(line);
if (!apexDomain) continue;

domainCountMap[apexDomain] ||= 0;
const line = domainArr[i];

const isPhishingDomainMockingCoJp = line.includes('-co-jp');
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 0.5;
}
const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;

if (line.startsWith('.amaz')) {
domainCountMap[apexDomain] += 0.5;

if (line.startsWith('.amazon-')) {
domainCountMap[apexDomain] += 4.5;
}
if (isPhishingDomainMockingCoJp) {
domainCountMap[apexDomain] += 4;
}
} else if (line.startsWith('.customer')) {
domainCountMap[apexDomain] += 0.25;
const apexDomain = gorhill.getDomain(safeGorhillLine);
if (!apexDomain) {
console.log({ line });
continue;
}

const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
const tld = gorhill.getPublicSuffix(safeGorhillLine);
if (!tld || !BLACK_TLD.has(tld)) continue;

// Only when tld is black will this 1 weight be added
domainCountMap[apexDomain] += 1;

const lineLen = line.length;

if (lineLen > 19) {
// Add more weight if the domain is long enough
if (lineLen > 44) {
domainCountMap[apexDomain] += 3.5;
} else if (lineLen > 34) {
domainCountMap[apexDomain] += 2.5;
} else if (lineLen > 29) {
domainCountMap[apexDomain] += 1.5;
} else if (lineLen > 24) {
domainCountMap[apexDomain] += 0.75;
} else {
domainCountMap[apexDomain] += 0.25;
}

if (domainCountMap[apexDomain] < 5) {
const subdomain = tldts.getSubdomain(line, { detectIp: false });
if (subdomain?.includes('.')) {
domainCountMap[apexDomain] += 1.5;
}
}
}
domainCountMap[apexDomain] ||= 0;
domainCountMap[apexDomain] += calcDomainAbuseScore(line);
}
});

const results = span.traceChildSync('get final phishing results', () => {
const res: string[] = [];
for (const domain in domainCountMap) {
if (domainCountMap[domain] >= 5) {
if (domainCountMap[domain] >= 8) {
res.push(`.${domain}`);
}
}
Expand All @@ -204,3 +166,61 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g

return [results, domainSet] as const;
});

export function calcDomainAbuseScore(line: string) {
let weight = 1;

const isPhishingDomainMockingCoJp = line.includes('-co-jp');
if (isPhishingDomainMockingCoJp) {
weight += 0.5;
}

if (line.startsWith('.amaz')) {
weight += 0.5;

if (line.startsWith('.amazon-')) {
weight += 4.5;
}
if (isPhishingDomainMockingCoJp) {
weight += 4;
}
} else if (line.includes('.customer')) {
weight += 0.25;
}

const lineLen = line.length;

if (lineLen > 19) {
// Add more weight if the domain is long enough
if (lineLen > 44) {
weight += 3.5;
} else if (lineLen > 34) {
weight += 2.5;
} else if (lineLen > 29) {
weight += 1.5;
} else if (lineLen > 24) {
weight += 0.75;
} else {
weight += 0.25;
}
}

const subdomain = tldts.getSubdomain(line, { detectIp: false });

if (subdomain) {
if (subdomain.slice(1).includes('.')) {
weight += 1;
}
if (subdomain.length > 40) {
weight += 3;
} else if (subdomain.length > 30) {
weight += 1.5;
} else if (subdomain.length > 20) {
weight += 1;
} else if (subdomain.length > 10) {
weight += 0.1;
}
}

return weight;
}
3 changes: 3 additions & 0 deletions Source/domainset/reject_sukka.conf
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ inst.360safe.com
.pages.net.br
.myenotice.com
.eu5.net
.jdie.pl

# --- AD Block ---

Expand Down Expand Up @@ -733,6 +734,8 @@ comments.gazo.space
.footprintdns.com
.measure.office.com

.opinionjet.com

# >> Tracking
.mktg.tags.f5.com
.trk.caseads.com
Expand Down

0 comments on commit b5a6e05

Please sign in to comment.