In [113]:
import os
import importlib.util

if importlib.util.find_spec("requests") is None:
	os.system("pip install requests")

if importlib.util.find_spec("beautifulsoup4") is None:
	os.system("pip install beautifulsoup4")


if importlib.util.find_spec("Counter") is None:
	os.system("pip install Counter")

In [49]:
#! Parameters

import os
import dotenv
import importlib.util

if importlib.util.find_spec("google.colab") is not None: ## if using google colab
    if not os.path.exists('.env'):
        from google.colab import files
        uploaded = files.upload()
        file_name = list(uploaded.keys())[0]
        try:
            os.rename(file_name, '.env')
        except:
            pass

try:
    dotenv.load_dotenv('.env')

    Host = os.getenv('SECRETS_CONFLUENCE')
    Username = os.getenv('SECRETS_USERNAME')
    Password = os.getenv('SECRETS_PASSWORD')
except:
    pass

if Host is None or Host == "":
    Host = input("Enter Host")

if Username is None or Username == "":
    Username = input("Enter Username")

if Password is None or Password == "":
    Password = input("Enter Password")

display("Host: " + Host)

'Host: https://autoandgeneral-sandbox-377.atlassian.net/wiki/'

In [118]:
#! Functions
import base64
from bs4 import BeautifulSoup
import requests
from collections import Counter

requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)

def fnGetDefaultHeaders():
    return {
        "content-type": "application/json",
        "authorization": "Basic " + base64.b64encode((Username + ":" + Password).encode()).decode(),
        "retry-after": "120"
    }

def ApiSpaces(startAt) :
	url = "/rest/api/space"
	headers = fnGetDefaultHeaders()
	params = {
		"type": "global",
		"limit": "50",
		"expand": "permissions",
		"start": str(startAt)
	}
	response = requests.get(Host + url, headers = headers, params = params, verify=False)
	return response.json()

def ApiSpaceContent(key, startAt) :
	url = "/rest/api/space/" + key + "/content"
	headers = fnGetDefaultHeaders()
	params = {
		"depth": "all",
		"limit": "50",
		"expand": "history.contributors.publishers.users",
		"start": str(startAt)
	}
	response = requests.get(Host + url, headers = headers, params = params, verify=False)
	return response.json()

def ApiSpaceWatchers(key, startAt) :
	url = "/rest/api/space/" + key + "/watch"
	headers = fnGetDefaultHeaders()
	params = {
		"limit": "50",
		"start": str(startAt)
	}
	response = requests.get(Host + url, headers = headers, params = params, verify=False)
	return response.json()

def ApiContent(contentId, startAt) :
	url = "/rest/api/content/" + contentId
	headers = fnGetDefaultHeaders()
	params = {
		"limit": "50",
		"start": str(startAt),
		"expand": "history,history.lastUpdated"
	}
	response = requests.get(Host + url, headers = headers, params = params, verify=False)
	return response.json()

def SpacePageCall(key) :
	url = "/spaces/viewspacesummary.action"
	headers = fnGetDefaultHeaders()
	params = {
		"showAllAdmins": "true",
		"key": key
	}
	response = requests.get(Host + url, headers = headers, params = params, verify=False)
	return BeautifulSoup(response.content, "html.parser")

def most_frequent(List):
	try:
		c = Counter(List)
		most_common = [key for key, _ in c.most_common(5)]
		return most_common
	except:
		return ""

In [123]:
import re

results = []

for num in range(0, 100):
	response = ApiSpaces(num * 50)
	if (len(response["results"])) == 0:
		break
	results += response["results"]

for space in results:
	spaceDetails = {}
	spaceDetails['key'] = space["key"]
	spaceDetails['name'] = space["name"]
	spaceDetails['location'] = Host[:-1] + space["_links"]["webui"]

	groups = []
	users = []

	for perm in space["permissions"]:
		if (perm['operation']['operation'] == "administer"):
			try:
				groups.append(perm["subjects"]["group"]["results"][0]["name"])
			except:
				try:
					if (perm["subjects"]["user"]["results"][0]["accountType"] != "app"):
						users.append(perm["subjects"]["user"]["results"][0]["displayName"])
				except:
					pass

	spaceDetails['groups'] = groups
	spaceDetails['groups_count'] = len(groups)
	spaceDetails['users'] = users
	spaceDetails['users_count'] = len(users)

	page = SpacePageCall(space["key"])
	spaceAdmins = page.find(id="spaceAdmins")
	people = spaceAdmins.find_all("a", class_="confluence-userlink")
	andMore = spaceAdmins.find(id="additionalAdmins")
	try:
		more = re.findall(r'\b\d+\b', andMore.text)[0]
		spaceDetails['total_count'] = int(more) + len(people)
	except:
		spaceDetails['total_count'] = len(people)

	pages = 0
	analytics = []
	for num in range(0, 10000):
		content = ApiSpaceContent(spaceDetails['key'], num * 50)
		if (len(content["page"]["results"]) == 0):
			break
		pages += len(content["page"]["results"])

		pageResults = content["page"]["results"]
		for page in pageResults:
			try:
				pageContributors = page["history"]["contributors"]["publishers"]["users"]
				for account in pageContributors:
					user = account["displayName"]
					if ("Former user (Deleted)" != user and "Anonymous" != user and "admin" != user): # dont record deleted users
						analytics.append(user)
			except:
				pass

	spaceDetails['page_count'] = pages
	spaceDetails['most_frequent_content_by'] = '; '.join( most_frequent(analytics) )

	watchers = []
	for num in range(0, 100):
		content = ApiSpaceWatchers(spaceDetails['key'], num * 50)
		if (len(content["results"]) == 0):
			break
		for x in content["results"]:
			watchers.append(x["watcher"]["displayName"])
	spaceDetails['space_watchers'] = '; '.join(watchers)
	spaceDetails['space_watchers_count'] = len(watchers)

	print(
		spaceDetails['key'],
		spaceDetails['name'],
		spaceDetails['location'],
		spaceDetails['groups_count'],
		'; '.join(spaceDetails['groups']),
		spaceDetails['users_count'],
		'; '.join(spaceDetails['users']),
		spaceDetails['total_count'],
		spaceDetails['page_count'],
		spaceDetails['space_watchers_count'],
		spaceDetails['space_watchers'],
		spaceDetails['most_frequent_content_by'],
		sep="\t", end="\n"
	)

## IF SERVER
# for space in response["results"]:
# 	key = "FU" # space["key"]
# 	name = space["name"]
# 	location = Host[:-1] + space["_links"]["webui"]

# 	page = SpacePageCall(key)
# 	spaceAdmins = page.find(id="spaceAdmins")

# 	print (spaceAdmins)

# 	people = spaceAdmins.find_all("a", class_="confluence-userlink")
# 	admins = '; '.join(person.text for person in people)

# 	print(key, name, location, len(people), admins, sep="\t", end="\n")



AB	ABallester	https://autoandgeneral-sandbox-377.atlassian.net/wiki/spaces/AB	1	confluence-administrators	2	Former user (Deleted); Dylan Carey	53	18	0		Vicki Guillemet; Ruth Holmes; Jamee Powell; luke.collins; Carlie Stiller [X]
RAM	Access Control Matrix	https://autoandgeneral-sandbox-377.atlassian.net/wiki/spaces/RAM	1	confluence-spaceadmins	2	Former user (Deleted); Dylan Carey	135	281	0		Scott Bodley; salman.iqbal; Matthew Betts; mpender; Peter Du Preez
ADVA	Advanced Alerting	https://autoandgeneral-sandbox-377.atlassian.net/wiki/spaces/ADVA	1	confluence-spaceadmins	2	Former user (Deleted); Dylan Carey	135	13	0		John Porter; Grant Gurney; Larry Ducie
CoP	A&G Communities of Practice	https://autoandgeneral-sandbox-377.atlassian.net/wiki/spaces/CoP	1	confluence-spaceadmins	3	Paulo Ogliani; Former user (Deleted); Dylan Carey	135	170	1	Kitty Cunningham	Sonia Tobin; Charl Benckendorff; Asvin Preetha Sivadasan; Nathan Roberts; Kristine Rees
JIRA	A&G Jira	https://autoandgeneral-sandbox-377.at