Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
07aae4d
Shellcheck and pylint
ben-grande Jul 11, 2025
8009f11
Add logging helper
ben-grande Jul 11, 2025
6710c76
Shell formatting
ben-grande Jul 11, 2025
67f83c9
Minor fixes
ben-grande Jul 11, 2025
6620c8b
Stricter shell options
ben-grande Jul 14, 2025
e2db953
Compare empty against -z
ben-grande Jul 14, 2025
9c6be07
Check for empty file with -s
ben-grande Jul 14, 2025
fc8764a
Add safe terminal utils
ben-grande Jul 14, 2025
041a6d7
Substitute seq for {1..n}
ben-grande Jul 14, 2025
8fbe34c
Clear redirects
ben-grande Jul 14, 2025
8a040dd
Add missing log function
ben-grande Jul 14, 2025
78e1344
Add end-of-options separator --
ben-grande Jul 14, 2025
6769f81
Log only after source
ben-grande Jul 14, 2025
5c41de4
Normal variable to array
ben-grande Jul 14, 2025
d8870a8
Minor cleanup
ben-grande Jul 14, 2025
495fa52
Convert some scripts env vars to script arguments
ben-grande Jul 14, 2025
8ebdab1
Remove unused code
ben-grande Jul 14, 2025
d612e8f
Fix instruction URL regex
ben-grande Jul 17, 2025
230c62d
Another wave of removing environment variables
ben-grande Jul 17, 2025
4efa383
Even more environment variable removal
ben-grande Jul 17, 2025
d7f5df4
Fix usage instructions
ben-grande Jul 18, 2025
4d9d3b4
Clear logout/login reason on failure
ben-grande Jul 18, 2025
b2371be
Get wiki API from credentials
ben-grande Jul 18, 2025
cab99fa
Move credentials instruction to README
ben-grande Jul 21, 2025
51cdb9c
Remove unused items
ben-grande Jul 21, 2025
5f11512
Pass argument array to retry wrapper
ben-grande Jul 21, 2025
9a1864e
Reference where a variable is set
ben-grande Jul 21, 2025
9fb899e
Allow edit message to be a positional parameter
ben-grande Jul 21, 2025
ecd912f
Add more option parsing
ben-grande Jul 21, 2025
c6a485c
Minor cleanup
ben-grande Jul 23, 2025
e238319
Rename mw-login-test
ben-grande Jul 23, 2025
e766071
Prefer non-environment variable
ben-grande Jul 23, 2025
cffc004
Be verbose on login failure
ben-grande Jul 23, 2025
8741265
Download file with the same name as the page
ben-grande Jul 23, 2025
33e0506
Add missing wiki URL
ben-grande Jul 23, 2025
e502819
Add missing wiki URL
ben-grande Jul 23, 2025
255f1f6
Remove option parser test file
ben-grande Jul 23, 2025
809bebb
Add missing log levels
ben-grande Jul 23, 2025
908d43d
Minor corrections
ben-grande Jul 23, 2025
591d268
Fix log level warn
ben-grande Jul 23, 2025
846aeb9
Fix retry wrapper derivative argument parser
ben-grande Jul 23, 2025
111bbd1
Do not expand password
ben-grande Jul 23, 2025
28a8ff7
Log curl calls
ben-grande Jul 23, 2025
267d7e3
Add URL to curl failure logging
ben-grande Jul 23, 2025
7d7080a
Fix unbound variable
ben-grande Jul 23, 2025
8dc85db
Bind names to URLs
ben-grande Jul 23, 2025
aee31f0
Logging curl runs can leak secrets
ben-grande Jul 23, 2025
d78a24e
Print files with logging
ben-grande Jul 23, 2025
2a040f8
Add inline comments to TODO
ben-grande Jul 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
credentials
mirror-multi-wiki
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
# bash shell scripts for usage of MediaWiki API #

Description here.
Create a credentials file in `/usr/share/mediawiki-shell/credentials` or
`~/.mediawikishell_credentials` with the following contents:

```sh
case "${WIKI_URL-}" in
*".whonix."*)
WIKI_API_USER_NAME='username'
WIKI_API_USER_PASS='password'
;;
*".kicksecure."*)
WIKI_API_USER_NAME='username'
WIKI_API_USER_PASS='password'
;;
esac
WIKI_API="$WIKI_URL/api.php"
WIKI_INDEX="$WIKI_URL/index.php"
```

TODO
If using a different wiki, add an entry to the `case` block.

## How to install `mediawiki-shell` using apt-get ##

Expand Down
257 changes: 147 additions & 110 deletions usr/bin/mw-all-pages
Original file line number Diff line number Diff line change
@@ -1,137 +1,174 @@
#!/bin/bash
#!/usr/bin/env bash

stecho "$0: START"

set -x
set -e
set -euEo pipefail

# shellcheck source=../share/mediawiki-shell/common
source /usr/share/mediawiki-shell/common

## These variables should be set by the calling script as environment variables.
## example:
#[[ -v SOURCE_WIKI_URL ]] || SOURCE_WIKI_URL='https://www.whonix.org/w'
check_vars_exist SOURCE_WIKI_URL QUERY_TYPE

[[ -v SOURCE_TARGET_API ]] || SOURCE_TARGET_API="${SOURCE_WIKI_URL}/api.php"
[[ -v 1 ]] || missing_variable "parameter 1 needs to be set to the allpages_file name. example: $0 /tmp/allpages.txt"

log info "START"

usage(){
printf '%s\n' "Usage: ${0##*/} [OPTIONS] WIKI QUERY OUTPUT
Options:
--article-sanity-test=X Guarantee that string is present on the fetched pages
--namespace-default-list=N Page collection per namespace (default: ${default_namespace_default_list})
--namespace-extra-list=N Extra page collection per namespace
Example:
${0##*/} https://www.kicksecure.com/w
${0##*/} https://www.kicksecure.com/w [allpages|unreviewedpages|querypage]
${0##*/} https://www.kicksecure.com/w allpages /tmp/allpages.txt
${0##*/} https://www.kicksecure.com/w allpages /tmp/allpages.txt" >&2
exit 1
}

# shellcheck source=/usr/libexec/helper-scripts/parse_opt.sh
source /usr/libexec/helper-scripts/parse_opt.sh

article_sanity_test=""
## https://www.mediawiki.org/wiki/Manual:Namespace
## https://www.kicksecure.com/w/api.php?action=query&meta=siteinfo&siprop=namespaces
## https://www.whonix.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces
[[ -v wiki_namespace_list_default ]] || wiki_namespace_list_default="0 4 6 8 10 12 14"
default_namespace_default_list="0 4 6 8 10 12 14"
## NOTE to self by Patrick: also set by other script:
## wiki-backup-with-mediawiki-shell
## 500: site-specific namespace Moved
## 274: site-specific namespace Widgets
#[[ -v wiki_namespace_list_extra ]] || wiki_namespace_list_extra="500 274"
[[ -v wiki_namespace_list_extra ]] || wiki_namespace_list_extra=""
namespace_extra_list=""

while true; do
[[ "${1-}" =~ ^- ]] || break
begin_optparse "${1:-}" "${2:-}" || break
true "${opt-}" "${arg-}" "${opt_orig-}"
case "${opt}" in
article-sanity-test) get_arg; article_sanity_test="${arg}";;
namespace-default-list) get_arg; namespace_default_list="${arg}";;
namespace-extra-list) get_arg; namespace_extra_list="${arg}";;
h|help) usage;;
--|"") break;;
*) die 2 "Invalid option: '${opt_orig}'"
esac
shift "${shift_n:-1}"
done

[[ -v wiki_article_must_include_sanity_test ]] || wiki_article_must_include_sanity_test=""
namespace_default_list="${namespace_default_list-"${default_namespace_default_list}"}"

api_extra_args=""
if [ "$QUERY_TYPE" = "allpages" ]; then
list=allpages
namespace_keyword=apnamespace
api_continue_keyword=apcontinue
api_limit_keyword_and_content="aplimit=500"
elif [ "$QUERY_TYPE" = "unreviewedpages" ]; then
list=unreviewedpages
namespace_keyword=urnamespace
api_continue_keyword=urstart
api_limit_keyword_and_content="urlimit=500"
elif [ "$QUERY_TYPE" = "querypage" ]; then
list=querypage
namespace_keyword=""
api_continue_keyword=""
api_limit_keyword_and_content="qplimit=500"
api_extra_args="&qppage=Unusedimages"
## Upstream does not support name spaces for this API call.
wiki_namespace_list_extra=""
wiki_namespace_list_default=0
## https://www.kicksecure.com/w/api.php?action=query&list=querypage&qppage=Unusedimages
else
stecho "$0: ERROR: QUERY_TYPE must be set to either QUERY_TYPE=allpages or QUERY_TYPE=unreviewedpages"
exit 1
if [[ -z "${3-}" ]]; then
usage
fi

allpages_file="$1"
wiki_url="$1"
query_type="$2"
allpages_file="$3"
range_arg query_type "${query_type}" allpages unreviewedpages querypage

# shellcheck source=../share/mediawiki-shell/wiki-config
WIKI_URL="$wiki_url" source /usr/share/mediawiki-shell/wiki-config

api_extra_args=""
case "$query_type" in
allpages)
list=allpages
namespace_keyword=apnamespace
api_continue_keyword=apcontinue
api_limit_keyword_and_content="aplimit=500"
;;
unreviewedpages)
list=unreviewedpages
namespace_keyword=urnamespace
api_continue_keyword=urstart
api_limit_keyword_and_content="urlimit=500"
;;
querypage)
list=querypage
namespace_keyword=""
api_continue_keyword=""
api_limit_keyword_and_content="qplimit=500"
api_extra_args="&qppage=Unusedimages"
## Upstream does not support name spaces for this API call.
namespace_extra_list=""
namespace_default_list=0
## https://www.kicksecure.com/w/api.php?action=query&list=querypage&qppage=Unusedimages
;;
*)
log error "query_type must be set to either: allpages, unreviewedpages or querypage"
exit 1
;;
esac

safe-rm -f -- "$allpages_file"

stecho "$0: INFO: allpages_file : $allpages_file"
stecho "$0: INFO: wiki_namespace_list_default: $wiki_namespace_list_default"
stecho "$0: INFO: wiki_namespace_list_extra : $wiki_namespace_list_extra"
stecho "$0: INFO: SOURCE_WIKI_URL : $SOURCE_WIKI_URL"
stecho "$0: INFO: SOURCE_TARGET_API: $SOURCE_TARGET_API"
stecho "$0: INFO: QUERY_TYPE: $QUERY_TYPE"
log info "allpages_file : $allpages_file"
log info "namespace_default_list: $namespace_default_list"
log info "namespace_extra_list : $namespace_extra_list"
log info "wiki_url : $wiki_url"
log info "WIKI_API: $WIKI_API"
log info "query_type: $query_type"

## Not required for public wiki.
#mw-logout
#mw-login

for wiki_namespace_item in $wiki_namespace_list_extra $wiki_namespace_list_default ; do
stecho "$0: INFO: wiki_namespace_item: $wiki_namespace_item"

api_continue_or_not=""

while true ; do
stecho "$0: INFO: api_full_link:"
api_full_link="${SOURCE_TARGET_API}?&format=json&action=query&list=${list}&${namespace_keyword}=${wiki_namespace_item}&${api_limit_keyword_and_content}&${api_continue_keyword}=${api_continue_or_not}${api_extra_args}"
stecho "$api_full_link"

query_result=$(\
$curl \
$curl_opts \
"$api_full_link"
)

if [ "$QUERY_TYPE" = "allpages" ]; then
stecho "$query_result" | jq -r ".query.allpages[] | .title | @sh" | tee -a "$allpages_file" >/dev/null
## If curl exits non-zero, consider no longer API continue.
## If curl exits zero, set variable api_continue_or_not.
if ! api_continue_or_not="$(stecho "$query_result" | jq -r ".continue | .apcontinue")" ; then
break
fi
## If api_continue_or_not is set to "null", we're done.
if [ "$api_continue_or_not" = "null" ]; then
break
fi
elif [ "$QUERY_TYPE" = "unreviewedpages" ]; then
stecho "$query_result" | jq -r '.query.unreviewedpages[] | .title | @sh' | tee -a "$allpages_file" >/dev/null
if ! api_continue_or_not="$(stecho "$query_result" | jq -r ".continue.urstart")" ; then
break
fi
if [ "$api_continue_or_not" = "null" ]; then
break
fi
elif [ "$QUERY_TYPE" = "querypage" ]; then
stecho "$query_result" | jq -r '.query.querypage.results[] | .title' | tee -a "$allpages_file" >/dev/null
## Upstream does not support name spaces for this API call.
break
else
error "Not implemented!"
break
fi
done
#mw-login-test "$wiki_url"

for wiki_namespace_item in $namespace_extra_list $namespace_default_list; do
log info "wiki_namespace_item: $wiki_namespace_item"

api_continue_or_not=""

while true; do
api_full_link="${WIKI_API}?&format=json&action=query&list=${list}&${namespace_keyword}=${wiki_namespace_item}&${api_limit_keyword_and_content}&${api_continue_keyword}=${api_continue_or_not}${api_extra_args}"
log info "api_full_link: $api_full_link"

query_result=$($curl "${curl_opts[@]}" "$api_full_link")

case "$query_type" in
allpages)
stecho "$query_result" | jq -r ".query.allpages[] | .title | @sh" | tee -a -- "$allpages_file" >/dev/null
## If curl exits non-zero, consider no longer API continue.
## If curl exits zero, set variable api_continue_or_not.
if ! api_continue_or_not="$(stecho "$query_result" | jq -r ".continue | .apcontinue")"; then
break
fi
## If api_continue_or_not is set to "null", we're done.
if [ "$api_continue_or_not" = "null" ]; then
break
fi
;;
unreviewedpages)
stecho "$query_result" | jq -r '.query.unreviewedpages[] | .title | @sh' | tee -a -- "$allpages_file" >/dev/null
if ! api_continue_or_not="$(stecho "$query_result" | jq -r ".continue.urstart")"; then
break
fi
if [ "$api_continue_or_not" = "null" ]; then
break
fi
;;
querypage)
stecho "$query_result" | jq -r '.query.querypage.results[] | .title' | tee -a -- "$allpages_file" >/dev/null
## Upstream does not support name spaces for this API call.
break
;;
*)
log error "Not implemented!"
break
;;
esac
done
done

test -f "$allpages_file"
if [ ! -s "$allpages_file" ]; then
log error "allpages_file file is empty!" >&2
exit 1
fi
result_test="$(stcat "$allpages_file")"
if [ "$result_test" = "" ]; then
stecho "$0 ERROR: result_test is empty!" >&2
exit 1

if [ -z "$article_sanity_test" ]; then
log info "article_sanity_test not configured, ok."
exit
fi

if [ "$wiki_article_must_include_sanity_test" = "" ]; then
stecho "$0 INFO: wiki_article_must_include_sanity_test not configured, ok."
if grep -i -- "$article_sanity_test" "$allpages_file" >/dev/null 2>&1; then
log info "result_test does contain article_sanity_test '$article_sanity_test', ok. "
else
## TODO: stecho too much. ARG_MAX potentially hit. grep based on file instead.
if stecho "$result_test" | grep -i -- "$wiki_article_must_include_sanity_test" >/dev/null 2>/dev/null; then
stecho "$0 INFO: result_test does contain wiki_article_must_include_sanity_test '$wiki_article_must_include_sanity_test', ok. "
else
stecho "$0 ERROR: See script source code. result_test does not contain '$wiki_article_must_include_sanity_test'! allpages_file: '$allpages_file'" >&2
stecho "" >&2
stecho "$0 result_test: '$result_test'" >&2
stecho "" >&2
exit 1
fi
log error "See script source code. result_test does not contain '$article_sanity_test'! allpages_file: '$allpages_file'"
log error "result_test: '$result_test'"
exit 1
fi
Loading