Skip to content

Commit

Permalink
vashikaran to nwb, in domain names
Browse files Browse the repository at this point in the history
  • Loading branch information
normalhuman committed Nov 25, 2015
1 parent 32e9a95 commit 9e50548
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions findspam.py
Expand Up @@ -71,7 +71,7 @@ def has_phone_number(s, site):
class FindSpam:
bad_keywords = ["baba ?ji", "fifa.*coins?", "fifabay", "Long Path Tool",
"fifaodell", "brianfo", "tosterone", "bajotz",
"vashi?k[ae]r[ae]n", "kolcak", "Zapyo", "we (offer|give out) (loans|funds|funding)",
"kolcak", "Zapyo", "we (offer|give out) (loans|funds|funding)",
"porn", "molvi", "judi bola", "ituBola.com", "lost lover'?s?",
"rejuvenated skin", "ProBrain", "restore[ -]?samsung[ -]?data",
"LifeForce", "swtor2credits", "me2.do",
Expand Down Expand Up @@ -103,7 +103,7 @@ class FindSpam:
"V[ -]?Stamina", "Gynectrol", "Adderin", "Whizz Systems?", "intellux", "viooz",
"smartican", "essay writing service", "T-complex", "retrodynamic formula",
"eltima"]
bad_keywords_nwb = [u"ಌ", "babyli(ss|cious)", "garcinia", "cambogia", "acai ?berr", # "nwb" == "no word boundary"
bad_keywords_nwb = [u"ಌ", "vashi?k[ae]r[ae]n", "babyli(ss|cious)", "garcinia", "cambogia", "acai ?berr", # "nwb" == "no word boundary"
"(eye|skin|aging) ?cream", "b ?a ?m ?((w ?o ?w)|(w ?a ?r))", "online ?it ?guru",
"abam26", "watch2live", "cogniq", "eye ?(serum|lift)", "(serum|lift) ?eye", "tophealth", "poker ?online",
"caralluma", "male\\Wperf", "anti[- ]?aging", "lumisse", "(ultra|berry|body)[ -]?ketone",
Expand Down Expand Up @@ -229,7 +229,8 @@ class FindSpam:
"shacamerica\\.net", "nillowpages\\.com", "letsnurture\\.com", "healthpeters\\.com",
"rozapk\\.com", "jihosoft\\.com", "mahnazmezon\\.com", "technical-care\\.com",
"skyformation\\.com", "shiftingsolutions\\.in", "bandsawjudge\\.com",
"liveestorebuilder\\.com", "exampracticequestions\\.com", "createspace\\.com"]
"liveestorebuilder\\.com", "exampracticequestions\\.com", "createspace\\.com",
"healthpeters\\.com"]
pattern_websites = [r"health\d{3,}", r"http\S*?\.repair\W", r"filefix(er)?\.com", "\.page\.tl\W",
r"\.(com|net)/xtra[\w-]", r"//xtra[\w-]*?\.(co|net|org|in\W|info)",
r"fifa\d+[\w-]*?\.com", r"[\w-](giveaway|jackets|supplys)\.com",
Expand All @@ -246,7 +247,7 @@ class FindSpam:
r"(livestreaming|watch[\w-]*?(live|online))\.(com|net|tv)",
r"//(cheat[\w-.]{3,}|xtreme[\w-]{5,})\.(co|net|org|in\W|info)",
r"([\w-]password|[\w]{5,}facts|\Btoyshop|[\w-]{6,}cheats|credits)\.(co|net|org|in\W|info)",
r"(profits|ketone|seotools|seotrick|crazybulk|onsale|fat(burn|loss)|(\.|//|best)cheap|online(training|solution))[\w-]*?\.(co|net|org|in\W|info)",
r"(spell(caster|specialist)|profits|ketone|seotools|seotrick|crazybulk|onsale|fat(burn|loss)|(\.|//|best)cheap|online(training|solution))[\w-]*?\.(co|net|org|in\W|info)",
r"(bestmover|loans|escort|testo[-bsx]|cleanse|supplement|lubricant|serum|wrinkle|topcare|freetrial)[\w-]*?\.(co|net|org|in\W|info)",
r"(buy|premium|training|thebest|[/.]try)[\w]{10,}\.(co|net|org|in\W|info)",
r"\w{10}buy\.(co|net|org|in\W|info)",
Expand Down

0 comments on commit 9e50548

Please sign in to comment.