Skip to content

Commit

Permalink
Added url catching feature that employs a simple regular expression t…
Browse files Browse the repository at this point in the history
…o detect URLs and auto-extracts page title.

Added config options enable_url_catch, url_fetch_timeout and max_urls_to_catch to allow use and customization of URL detection/catching feature.
  • Loading branch information
Paradox924X committed Jun 28, 2012
1 parent e8c74c0 commit fcf97b8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
3 changes: 3 additions & 0 deletions bot.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ command_prefix = !
show_timestamps = True
show_motd = True
enable_definitions = True
enable_url_catch = True
user_config = users.cfg
command_config = commands.cfg
sqlite_db = bot.db
url_fetch_timeout = 3
max_urls_to_catch = 3

; Connection
hostname = irc.dairc.net
Expand Down
21 changes: 18 additions & 3 deletions bot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/python
# coding: utf-8
# Author: Nikunj Mehta

import BeautifulSoup
from ConfigParser import RawConfigParser
import datetime
import re
Expand All @@ -10,6 +12,7 @@
import ssl
import string
import sys
import urllib2

####

Expand Down Expand Up @@ -40,10 +43,10 @@ def get_config_list(key, cfg=config):
####

def is_private_msg(line):
return is_my_nick(line.split(' ')[2])
return len(line.split(' ')) > 2 and is_my_nick(line.split(' ')[2])

def is_channel_msg(line):
return not is_private_msg(line)
return len(line.split(' ')) > 2 and line.split(' ')[2][0] == '#'

def is_my_nick(nick):
return nick is not None and nick == get_config('nickname')
Expand Down Expand Up @@ -242,7 +245,7 @@ def signal_handler(signal, frame):

#### Start

VERSION = 'EnigmaIRCb v0.3beta'
VERSION = 'EnigmaIRCb v0.4beta'
GREETING = 'Welcome to ' + VERSION + '!'
CONFIG_FILE_NAME = 'bot.cfg'

Expand Down Expand Up @@ -284,6 +287,10 @@ def signal_handler(signal, frame):
bot_quit(1)
command_groups_list[command_groups_index] = command_groups

should_catch_urls = get_config_bool('enable_url_catch')
url_fetch_timeout = get_config_int('url_fetch_timeout')
max_urls_to_catch = get_config_int('max_urls_to_catch')

print_out('='*len(GREETING))
print_out(GREETING)
print_out('='*len(GREETING))
Expand Down Expand Up @@ -408,6 +415,14 @@ def signal_handler(signal, frame):
else:
send_privmsg(target, 'Invalid Usage. Please specify a word to query definition information on.')

if should_catch_urls and len(message_parts) > 3 and is_channel_msg(line):
target = extract_target(line)
for url in re.findall("""(?:https?://w{2,3}\d{0,3}?|https?://|w{2,3}\d{0,3}?)\.[a-z-_]+\..{2,4}[^ ]+?""", ' '.join(message_parts[3:])[1:], re.IGNORECASE)[:max_urls_to_catch]:
try:
send_privmsg(target, "" + url + " - " + BeautifulSoup.BeautifulSoup(urllib2.urlopen(url if url[:4] == 'http' else 'http://' + url, None, url_fetch_timeout)).title.string)
except urllib2.URLError:
pass

line = f.readline().rstrip()
else:
print_out('Connection closed')
Expand Down

0 comments on commit fcf97b8

Please sign in to comment.