Permalink
Browse files

Initial commit

  • Loading branch information...
Piotr Lizonczyk
Piotr Lizonczyk committed Jul 22, 2015
0 parents commit e73c42bb5d6981991dc99d3d0c0f279904a86992
@@ -0,0 +1,42 @@
.project
.pydevproject
.settings/
*.pyc
wad/test
*.py[cod]
obsolete/
rpms/
*.tgz
# C extensions
*.so
# Packages
*.egg
*.egg-info
dist
build
eggs
parts
var
sdist
develop-eggs
.installed.cfg
lib
lib64
# Installer logs
pip-log.txt
# Unit test / coverage reports
.coverage
.tox
nosetests.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
@@ -0,0 +1,10 @@
(Hopefully complete) list of people who contributed to this project:
* Sebastian Łopieński
* Piotr Lizończyk
* Vincent Brillaut
* Farzaneh Moghaddam
* Antonio Perez Perez
* Dame Jovanoski
Special thanks to Elbert Alias, the author of Wappalyzer.
674 LICENSE

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,4 @@
include LICENSE
include AUTHORS.md
include README.md
recursive-include wad/etc *
@@ -0,0 +1,44 @@
# WAD - Web application detector
WAD lets you analyze given URL(s) and detect technologies used by web application behind that URL,
from the OS and web server level, to the programming platform and frameworks, as well as server- and client-side
applications, tools and libraries.
For example, results of scan of server might include:
* OS: Windows, Linux...
* Web server: Apache, Nginx, IIS...
* Programming platform: PHP, Python, Ruby, Java...
* Content management systems: Drupal, WordPress...
* Frameworks: AngularJS, Ruby on Rails, Django...
* various databases, analytics tools, javascript libaries, CDNs, comment systems, search engines and many others.
## How it works
WAD is built as a standalone application, using [Wappalyzer](https://github.com/AliasIO/Wappalyzer)'s
detection rules. It sends a GET request to the given URL and analyzes both HTTP response header and body (HTML page),
looking for indications to discover web technologies used.
Detection results may include information about versions of technologies used, for example Linux distro or Apache version.
Results are categorized depending on type of technology (whether it is CMS or database etc.). There are now over 700
technologies that can be discovered using WAD.
## Usage
Use `wad_detect -h` to print help text.
JSON is used for convenient formatting of output data.
### Example usage scenario
Command: `wad_detect -u https://pypi.python.org/`
Output:
```
https://pypi.python.org/pypi:
- app: Varnish
type: cache-tools
ver: null
- app: Nginx
type: web-servers
ver: 1.6.2
- app: Google Analytics
type: analytics
ver: null
```
@@ -0,0 +1,40 @@
from setuptools import setup, find_packages
try:
import pypandoc
long_description = pypandoc.convert('README.md', 'rst')
long_description += "\n\n" + pypandoc.convert('AUTHORS.md', 'rst')
except (IOError, ImportError):
long_description = ''
setup(
name='wad',
version='0.1.0',
description='A tool for detecting technologies used by web applications.',
long_description=long_description,
url='', # TODO: fill with github repo url
license='GPLv3',
author='Sebastian Lopienski',
author_email='sebastian.lopienski@cern.ch',
packages=find_packages(),
include_package_data=True,
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Information Technology',
'Intended Audience :: Developers',
'Natural Language :: English',
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Topic :: Security',
'Topic :: Internet :: WWW/HTTP',
],
entry_points={
'console_scripts': [
'wad_detect = wad.wad_detect:main'
]
},
)
No changes.
@@ -0,0 +1,133 @@
# Clues taken from Wappalyzer
#
# clues: https://github.com/AliasIO/Wappalyzer/blob/master/src/apps.json
# more info: https://github.com/AliasIO/Wappalyzer/blob/master/README.md
# detection: https://github.com/AliasIO/Wappalyzer/blob/master/src/wappalyzer.js
# JavaScript RegExp object: http://www.w3schools.com/jsref/jsref_obj_regexp.asp
import os
import logging
import re
import tools
# preferring simplejson but fallback to json - this should work both on python 2.4 and 2.6
# for more see http://stackoverflow.com/a/712799
try:
import simplejson as json
except ImportError:
import json
CLUES_FILE = os.path.join(os.path.dirname(__file__), 'etc/apps.json')
class _Clues(object):
def __init__(self):
self.apps = None
self.categories = None
def get_clues(self, filename=CLUES_FILE):
if self.apps and self.categories:
return self.apps, self.categories
self.load_clues(filename)
self.compile_clues()
return self.apps, self.categories
@staticmethod
def read_clues_from_file(filename):
logging.info("Reading clues file %s", filename)
try:
json_data = open(filename)
except IOError, e:
logging.error("Error while opening clues file, terminating: %s", tools.error_to_str(e))
raise
try:
clues = json.load(json_data, encoding='utf-8')
except ValueError, e:
logging.error("Error while reading JSON file, terminating: %s", tools.error_to_str(e))
raise
json_data.close()
categories = clues['categories']
apps = clues['apps']
return apps, categories
@staticmethod
def merge_dictionaries(dict1, dict2, desc):
for key in dict2:
if key in dict1:
logging.warning("%s '%s' in both clues files", desc, key)
dict1[key] = dict2[key]
def string_to_array(self, tag):
for app in self.apps:
if tag in self.apps[app]:
if type(self.apps[app][tag]) in [str, unicode]:
self.apps[app][tag] = [self.apps[app][tag]]
def ignore_attributes(self, tag):
# ignore all attributes (such as "confidence") specified after \;
for app in self.apps:
if tag in self.apps[app]:
new_list = []
for item in self.apps[app][tag]:
values = item.split("\;")
new_list += [values[0]]
self.apps[app][tag] = new_list
def add_categories_str(self):
for app in self.apps:
self.apps[app]['catsStr'] = reduce(lambda a, b: "%s,%s" % (a, b),
[str(self.categories[str(x)]) for x in self.apps[app]['cats']])
def load_clues(self, filename):
self.apps, self.categories = self.read_clues_from_file(filename)
additional_clues = filename + ".other"
if os.path.isfile(additional_clues):
apps2, categories2 = self.read_clues_from_file(additional_clues)
# merge dictionaries with apps and categories; warn about repeated ones
self.merge_dictionaries(self.apps, apps2, "App")
self.merge_dictionaries(self.categories, categories2, "Category")
# some clues are strings while others are array of strings - make them all arrays
for field in ['url', 'html', 'env', 'script', 'implies', 'excludes']:
self.string_to_array(field)
# ignoring confidence in implies and excludes
self.ignore_attributes('implies')
self.ignore_attributes('excludes')
# add categories string field
self.add_categories_str()
@staticmethod
def compile_clue(regexp_extended):
values = regexp_extended.split("\;")
regex_dict = {"re": re.compile(values[0], flags=re.IGNORECASE)}
for extra_field in values[1:]:
try:
(k, v) = extra_field.split(':', 1)
regex_dict[k] = v
except ValueError:
regex_dict[extra_field] = None
return regex_dict
def compile_clues(self):
# compiling regular expressions
for app in self.apps:
regexps = {}
for key in self.apps[app]:
if key in ['script', 'html', 'url']:
regexps[key + "_re"] = map(self.compile_clue, self.apps[app][key])
if key in ['meta', 'headers']:
regexps[key + "_re"] = {}
for entry in self.apps[app][key]:
regexps[key + "_re"][entry] = self.compile_clue(self.apps[app][key][entry])
self.apps[app].update(regexps)
Clues = _Clues() # For use as singleton
Oops, something went wrong.

0 comments on commit e73c42b

Please sign in to comment.