Skip to content

Commit

Permalink
clothing, sale pattern, savemantra
Browse files Browse the repository at this point in the history
  • Loading branch information
normalhuman committed Aug 11, 2016
1 parent 0627aa8 commit b41ac2c
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions findspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def keyword_link(s, site): # thanking keyword and a link in the same short ans

def bad_link_text(s, site): # suspicious text of a hyperlink
s = regex.sub("</?strong>|</?em>", "", s) # remove font tags
reg = regex.compile(ur"(?isu)^(buy|cheap)\b|\b(sale|coins)$|\b(porno|replica|luxury|essays?|thesis|in \L<city>)\b|\b\L<city>.*(service|escort|call girl)|(best|make|full|hd|software|cell|data)[\w ]{1,20}(online|service|company|repair|recovery)|\bwriting service", city=FindSpam.city_list)
reg = regex.compile(ur"(?isu)^(buy|cheap)\b|\b(porno?|(whole)?sale|coins|replica|luxury|essays?|thesis|in \L<city>)\b|\b\L<city>.*(service|escort|call girl)|(best|make|full|hd|software|cell|data)[\w ]{1,20}(online|service|company|repair|recovery)|\bwriting service", city=FindSpam.city_list)
links = regex.compile(ur'(?<=nofollow">)[^<]*(?=</a>)', regex.UNICODE).findall(s)
for link_text in links:
match = reg.search(link_text)
Expand Down Expand Up @@ -360,7 +360,7 @@ class FindSpam:
"megatachoco", "shtylm\\.com", "drilluobetemple\\.webs",
"creative-proteomics", "revommerce.com", "opendatascience\\.com",
"123trainings", "(bestof|beta)cheat\\.com", "surejob\\.in",
"israelbigmarket", "chinatour\\.com", "celebsclothing\\.com",
"israelbigmarket", "chinatour\\.com", "savemantra\\.com",
"imeshlab\\.com", "sagacademy\\.com", "moderncar\\.com", "[/.]iwacy\\.com",
"topbartercard\\.com", "couponconnexion\\.com", "npmedicalhome\\.com",
"ironbe\\.com", "sedancoupeseriesspecs\\.com", "techvaid\\.com", "pirachaexports\\.com",
Expand Down Expand Up @@ -440,7 +440,7 @@ class FindSpam:
r"(recoverysoftware|removevirus|support(number|help)|techhelp|calltech|exclusive|onlineshop|video(course|classes|tutorial(?!s))|vipmodel|(?<!word)porn|wholesale|inboxmachine|(get|buy)cheap|escort|diploma|(govt|government)jobs|extramoney|earnathome|spell(caster|specialist)|profits|seo-?(tool|service|trick|market)|onsale|fat(burn|loss)|(\.|//|best)cheap|online-?(training|solution))[\w-]*?\.(co|net|org|in\W|info|ir|wordpress|blogspot|tumblr|webs\.)",
r"(replica(?!t)|rs\d?gold|rssong|runescapegold|maxgain|e-cash|mothers?day|phone-?number|fullmovie|tvstream|trainingin|dissertation|research-?(paper|statement)|digitalmarketing|infocampus|cracked\w{3}|bestmover|relocation|\w{4}mortgage|loans|revenue|testo[-bsx]|cleanse|cleansing|detox|supplement|lubricant|serum|wrinkle|topcare|freetrial)[\w-]*?\.(co|net|org|in\W|info|wordpress|blogspot|tumblr|webs\.)",
r"(drivingschool|crack-?serial|serial-?(key|crack)|freecrack|appsfor(pc|mac)|remedies|heathcare|sideeffect|meatspin|packers\S{0,3}movers|(buy|sell)\S{0,12}cvv|goatse|burnfat|gronkaffe|muskel|tes(tos)?terone|nitric(storm|oxide)|masculin|menhealth|intohealth|babaji|spellcaster|potentbody|slimbody|moist|lefair|derma(?![nt])|xtrm|factorx|(?<!app)nitro(?!us)|endorev|ketone)[\w-]*?\.(co|net|org|in\W|info|wordpress|blogspot|tumblr|webs\.)",
r"(moving|\w{10}spell|[\w-]{3}password|\w{5}deal|\w{5}facts|\w\dfacts|\Btoyshop|[\w-]{6}cheats|[\w-]{6}girls|cheatcode|cracks|credits|-wallet|refunds|truo?ng|viet|trang)\.(co|net|org|in\W|info)",
r"(moving|\w{10}spell|[\w-]{3}password|\w{5}deal|\w{5}facts|\w\dfacts|\Btoyshop|[\w-]{6}cheats|[\w-]{6}girls|clothing|cheatcode|cracks|credits|-wallet|refunds|truo?ng|viet|trang)\.(co|net|org|in\W|info)",
r"(health|earn|max|cash|wage|pay|pocket|cent|today)[\w-]{0,6}\d+\.com",
r"(//|www\.)healthy?\w{5,7}\.com",
r"https?://[\w-.]\.repair\W", r"https?://[\w-.]{10,}\.(top|help)\W", r'https?://[\w-.]*-[\w-.]*\.pro[/"<]',
Expand Down

0 comments on commit b41ac2c

Please sign in to comment.