Skip to content

Commit

Permalink
digitalacads, digitalmarketing, etc
Browse files Browse the repository at this point in the history
  • Loading branch information
normalhuman committed Apr 2, 2016
1 parent 534cbd7 commit cf39276
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions findspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class FindSpam:
"skinology", "folliplex", "yafei ?cable", "MSP ?Hack ?Tool",
"uggs ?on ?sale", "PhenQ", "Hack ?Tool ?2015", "ATM hackers?",
"Vigoraflo", "Fonepaw", "Provasil", "Slimera", "Cerebria", "Xanogen",
"(networking|cisco|sas|hadoop|mapreduce|oracle|dba|php|sql|javascript|js|java|designing|salesforce|joomla)( certification)? (courses?|training).{0,25}</a>",
"(networking|cisco|sas|hadoop|mapreduce|oracle|dba|php|sql|javascript|js|java|designing|marketing|salesforce|joomla)( certification)? (courses?|training).{0,25}</a>",
"intellipaat", "Replennage", "Alpha XTRM", "Synagen", "Nufinity",
"V[ -]?Stamina", "Gynectrol", "Adderin", "Whizz Systems?", "intellux", "viooz",
"smartican", "essay writing service", "T-complex", "retrodynamic formula",
Expand All @@ -199,7 +199,7 @@ class FindSpam:
"(brain|breast|male|penile|penis)[- ]?(enhance|enlarge|improve|boost|plus|peak)",
"renuva(cell|derm)", " %uh ", " %ah ", "svelme", "tapsi ?sarkar", "viktminskning",
"unique(doc)?producers", "green ?tone ?pro", "troxyphen", "seremolyn", "revolyn",
"(SEO company|training|courses?).{0,4}(bangalore|chennai|delhi|hyderabad|kolkata|mumbai)",
"(SEO company|training|courses?).{0,4}(bangalore|chennai|delhi|hyderabad|kolkata|mumbai|madurai)",
u"C[O|0]M", "ecoflex", "no2factor", "sunergetic", "capilux", "sante ?avis",
"enduros", "dianabol", "ICQ#?\d{4}-?\d{5}", "3073598075", "lumieres"]
blacklisted_websites = ["online ?kelas", "careyourhealths", "wowtoes", "(naga|dewa)poker",
Expand Down Expand Up @@ -241,7 +241,7 @@ class FindSpam:
"regeasypro\\.com", "registryware\\.org", "smartfixer\\.(net|org)",
"dlllibrary\\.net", "wisefixer\\.(com|net|org)",
"password-?unlocker\\.com", "dropbox18gb\\.com",
"passwordtech\\.com", "goshareware\\.com",
"passwordtech\\.com", "goshareware\\.com", "digitalacads\\.in",
"nemopdf\\.com", "downloaddailymotion\\.com",
"free-download-youtube\\.com", "free-music-downloader\\.com",
"video-download-capture\\.com", "videograbber\\.net",
Expand Down Expand Up @@ -353,7 +353,7 @@ class FindSpam:
"cuidados-saude", "klereumcol\\.com", "gupshupchatroom\\.com"]
# Patterns: the top few lines are the most straightforward, matching any site with this string in domain name
pattern_websites = [r"(wholesale|inboxmachine|(get|buy)cheap|escort|diploma|governmentjobs|extramoney|earnathome|spell(caster|specialist)|profits|seo(tool|service|trick)|onsale|fat(burn|loss)|(\.|//|best)cheap|online(training|solution))[\w-]*?\.(co|net|org|in\W|info|wordpress|blogspot)",
r"(fullmovie|tvstream|trainingin|infocampus|cracked\w{3}|cracksoftware|bestmover|relocation|\w{4}mortgage|loans|revenue|testo[-bsx]|cleanse|cleansing|detox|supplement|lubricant|serum|wrinkle|topcare|freetrial)[\w-]*?\.(co|net|org|in\W|info|wordpress|blogspot)",
r"(fullmovie|tvstream|trainingin|digitalmarketing|infocampus|cracked\w{3}|cracksoftware|bestmover|relocation|\w{4}mortgage|loans|revenue|testo[-bsx]|cleanse|cleansing|detox|supplement|lubricant|serum|wrinkle|topcare|freetrial)[\w-]*?\.(co|net|org|in\W|info|wordpress|blogspot)",
r"(nitricoxide|menhealth|babaji|spellcaster|potentbody|moist|lefair|lubricant|derma(?![nt])|xtrm|factorx|(?<!app)nitro(?!us)|crazy(bulk|mass)|nauseam|endorev|ketone|//xtra)[\w-]*?\.(co|net|org|in\W|info|wordpress|blogspot)",
r"([\w-]password|\w{5}facts|\w\dfacts|\Btoyshop|[\w-]{6}cheats|cheatcode|credits)\.(co|net|org|in\W|info)",
r"health\d{3,}\.(com|net)", r"https?://[\w-.]*?\.repair\W", r"https?://[\w-.]{10,}\.help\W",
Expand Down Expand Up @@ -383,7 +383,7 @@ class FindSpam:
r"(pearl|phyto|[^s]cream|creme|geniu[sx]|optimal|xplode|ultra|natura|testo|scam|wellness|grow|rejuven|revive|burn|vapor|ecig|formula|biotic|probio|male|derma|medical|medicare|health|beauty|youth|young|aging|rx|skin|trim|slim|weight|fat|nutrition|shred|advance|perfect|top|super|ultra|alpha|beta|colon|brain(?!tree))[\w]{0,20}(plus|l[iy]ft|trial|nutrition|congress|jacked|dose|formula|brazil|france|norway|sweden|mexico|genix|critic|funct?ion|power|rewind|points|essence|essential|about|market|max|help|info|policy|program|center|centre|care|try|slim|idea|pro|tip|review|assess|report|critique|blog|site|mag|chat|guide|advi[sc]|fact|discussion|solution|consult|source|sups|vms|cream|grow|enhance)[\w-]{0,10}\.(co|net|org|in\W|info|wordpress|blogspot)",
r"\w{11}(ideas?|income|sale|reviews?|advices?|problog)\.(co|net|org|in\W|info)",
"-poker\\.com", "send[\w-]*?india\.(co|net|org|in\W|info)",
r"(corrupt|repair)[\w-]*?\.blogspot", r"[\w-](courses?|training)(in[\w-]{5,})?.in/",
r"(corrupt|repair)[\w-]*?\.blogspot", r"[\w-](courses?|training)[\w-]*?\.in/",
r"(file|photo|android|iphone)recovery[\w-]*?\.(co|net|org|in\W|info)",
r"(videos?|movies?|watch)online[\w-]*?\.", r"hd(video|movie)[\w-]*?\.",
r"backlink(?!(o\.|watch))[\w-]*?\.(co|net|org|in\W|info)",
Expand Down

0 comments on commit cf39276

Please sign in to comment.