Skip to content

Commit

Permalink
fix many s/// tainting bugs
Browse files Browse the repository at this point in the history
This is a re-implementation of the tainting code in pp_subst and
pp_substcont. Although this fixes many bugs, because its a de-novo rewrite
of the tainting parts of the code in those two functions, it's quite
possible that it breaks some existing tainting behaviour. It doesn't break
any existing tests, although it turns out that this area was severely
under-tested anyway.

The main bugs that this commit fixes are as follows, where:

T = a tainted value
L = pattern tainted by locale (e.g. use locale; s/\w//)
Happens both with and without 'use re taint' unless specified.
Happens with all modifiers (/g, /r etc) unless explicitly mentioned.

    $1 unexpectedly untainted:
	s/T//
	T =~ s///            under use re 'taint'

     original string unexpectedly untainted:
	s/L//,  s/L//g

    return value unexpectedly untainted:
	T =~ s///g           under no re 'taint'
	s/L//g,  s/L//r

    return value unexpectedly tainted:
	s/T//
	s//T/r               under no re 'taint'
	T =~ s///            under use re 'taint'
	s//T/                under use re 'taint'

Also, with /ge, the original string becomes tainted as soon as possible
(usually in the second entry to the /e code block) rather than only at the
end, in code like

    $orig =~ s/T/...code.../ge

The rationale behind the taintedness of the return value of s/// (in the
non /r case), is that a boolean value shouldn't be tainted. This
corresponds to the general perl tainting policy that boolean ops don't
return tainted values. On the other hand,  when it returns an integer
(number of matches), that should be tainted.

A couple of note about the old tainting code this replaces: firstly, several
occurrences of the following were NOOPs, since rxtainted was U8 and the bit
being ored was > 256:

    rxtainted |= RX_MATCH_TAINTED(rx)

secondly, removing a whole bunch of the following didn't make any
existing tests fail:

    TAINT_IF(rxtainted & 1);
  • Loading branch information
iabyn committed Feb 16, 2011
1 parent c769ddc commit 20be658
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 52 deletions.
9 changes: 9 additions & 0 deletions perl.h
Expand Up @@ -541,6 +541,15 @@ register struct op *Perl_op asm(stringify(OP_IN_REGISTER));
#define TAINT_ENV() if (PL_tainting) { taint_env(); }
#define TAINT_PROPER(s) if (PL_tainting) { taint_proper(NULL, s); }

/* flags used internally only within pp_subst and pp_substcont */
#ifdef PERL_CORE
# define SUBST_TAINT_STR 1 /* string tainted */
# define SUBST_TAINT_PAT 2 /* pattern tainted */
# define SUBST_TAINT_REPL 4 /* replacement tainted */
# define SUBST_TAINT_RETAINT 8 /* use re'taint' in scope */
# define SUBST_TAINT_BOOLRET 16 /* return is boolean (don't taint) */
#endif

/* XXX All process group stuff is handled in pp_sys.c. Should these
defines move there? If so, I could simplify this a lot. --AD 9/96.
*/
Expand Down
51 changes: 43 additions & 8 deletions pp_ctl.c
Expand Up @@ -294,8 +294,8 @@ PP(pp_substcont)

SvGETMAGIC(TOPs); /* possibly clear taint on $1 etc: #67962 */

if (!(cx->sb_rxtainted & 2) && SvTAINTED(TOPs))
cx->sb_rxtainted |= 2;
if (SvTAINTED(TOPs))
cx->sb_rxtainted |= SUBST_TAINT_REPL;
sv_catsv_nomg(dstr, POPs);
/* XXX: adjust for positive offsets of \G for instance s/(.)\G//g with positive pos() */
s -= RX_GOFS(rx);
Expand All @@ -317,7 +317,8 @@ PP(pp_substcont)
else
sv_catpvn(dstr, s, cx->sb_strend - s);
}
cx->sb_rxtainted |= RX_MATCH_TAINTED(rx);
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
cx->sb_rxtainted |= SUBST_TAINT_PAT;

#ifdef PERL_OLD_COPY_ON_WRITE
if (SvIsCOW(targ)) {
Expand All @@ -334,20 +335,38 @@ PP(pp_substcont)
SvUTF8_on(targ);
SvPV_set(dstr, NULL);

TAINT_IF(cx->sb_rxtainted & 1);
if (pm->op_pmflags & PMf_NONDESTRUCT)
PUSHs(targ);
else
mPUSHi(saviters - 1);

(void)SvPOK_only_UTF8(targ);
TAINT_IF(cx->sb_rxtainted);
SvSETMAGIC(targ);
SvTAINT(targ);

/* update the taint state of various various variables in
* preparation for final exit */
if (PL_tainting) {
if ((cx->sb_rxtainted & SUBST_TAINT_PAT) ||
((cx->sb_rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
== (SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
)
(RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */

if (!(cx->sb_rxtainted & SUBST_TAINT_BOOLRET)
&& (cx->sb_rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT))
)
SvTAINTED_on(TOPs); /* taint return value */
/* needed for mg_set below */
PL_tainted = cBOOL(cx->sb_rxtainted &
(SUBST_TAINT_STR|SUBST_TAINT_PAT|SUBST_TAINT_REPL));
SvTAINT(TARG);
}
/* PL_tainted must be correctly set for this mg_set */
SvSETMAGIC(TARG);
TAINT_NOT;
LEAVE_SCOPE(cx->sb_oldsave);
POPSUBST(cx);
RETURNOP(pm->op_next);
/* NOTREACHED */
}
cx->sb_iters = saviters;
}
Expand Down Expand Up @@ -382,7 +401,23 @@ PP(pp_substcont)
}
if (old != rx)
(void)ReREFCNT_inc(rx);
cx->sb_rxtainted |= RX_MATCH_TAINTED(rx);
/* update the taint state of various various variables in preparation
* for calling the code block */
if (PL_tainting) {
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
cx->sb_rxtainted |= SUBST_TAINT_PAT;

if ((cx->sb_rxtainted & SUBST_TAINT_PAT) ||
((cx->sb_rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
== (SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
)
(RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */

if (cx->sb_iters > 1 && (cx->sb_rxtainted &
(SUBST_TAINT_STR|SUBST_TAINT_PAT|SUBST_TAINT_REPL)))
SvTAINTED_on(cx->sb_targ);
TAINT_NOT;
}
rxres_save(&cx->sb_rxres, rx);
PL_curpm = pm;
RETURNOP(pm->op_pmstashstartu.op_pmreplstart);
Expand Down
69 changes: 51 additions & 18 deletions pp_hot.c
Expand Up @@ -2071,7 +2071,7 @@ PP(pp_subst)
I32 maxiters;
register I32 i;
bool once;
U8 rxtainted;
U8 rxtainted = 0; /* holds various SUBST_TAINT_* flag bits */
char *orig;
U8 r_flags;
register REGEXP *rx = PM_GETRE(pm);
Expand Down Expand Up @@ -2127,11 +2127,19 @@ PP(pp_subst)
s = SvPV_mutable(TARG, len);
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV)
force_on_match = 1;
rxtainted = ((RX_EXTFLAGS(rx) & RXf_TAINTED) ||
(PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
if (PL_tainted)
rxtainted |= 2;
TAINT_NOT;

/* only replace once? */
once = !(rpm->op_pmflags & PMf_GLOBAL);

if (PL_tainting) {
rxtainted = (
(SvTAINTED(TARG) ? SUBST_TAINT_STR : 0)
| ((RX_EXTFLAGS(rx) & RXf_TAINTED) ? SUBST_TAINT_PAT : 0)
| ((pm->op_pmflags & PMf_RETAINT) ? SUBST_TAINT_RETAINT : 0)
| ((once && !(rpm->op_pmflags & PMf_NONDESTRUCT))
? SUBST_TAINT_BOOLRET : 0));
TAINT_NOT;
}

RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));

Expand Down Expand Up @@ -2173,12 +2181,12 @@ PP(pp_subst)
*/
}

/* only replace once? */
once = !(rpm->op_pmflags & PMf_GLOBAL);
matched = CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL,
r_flags | REXEC_CHECKED);
/* known replacement string? */
if (dstr) {
if (SvTAINTED(dstr))
rxtainted |= SUBST_TAINT_REPL;

/* Upgrade the source if the replacement is utf8 but the source is not,
* but only if it matched; see
Expand Down Expand Up @@ -2250,7 +2258,8 @@ PP(pp_subst)
PL_curpm = pm;
SvSCREAM_off(TARG); /* disable possible screamer */
if (once) {
rxtainted |= RX_MATCH_TAINTED(rx);
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
m = orig + RX_OFFS(rx)[0].start;
d = orig + RX_OFFS(rx)[0].end;
s = orig;
Expand Down Expand Up @@ -2283,15 +2292,15 @@ PP(pp_subst)
else {
sv_chop(TARG, d);
}
TAINT_IF(rxtainted & 1);
SPAGAIN;
PUSHs(rpm->op_pmflags & PMf_NONDESTRUCT ? TARG : &PL_sv_yes);
}
else {
do {
if (iters++ > maxiters)
DIE(aTHX_ "Substitution loop");
rxtainted |= RX_MATCH_TAINTED(rx);
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
m = RX_OFFS(rx)[0].start + orig;
if ((i = m - s)) {
if (s != d)
Expand All @@ -2312,7 +2321,6 @@ PP(pp_subst)
SvCUR_set(TARG, d - SvPVX_const(TARG) + i);
Move(s, d, i+1, char); /* include the NUL */
}
TAINT_IF(rxtainted & 1);
SPAGAIN;
if (rpm->op_pmflags & PMf_NONDESTRUCT)
PUSHs(TARG);
Expand All @@ -2329,21 +2337,28 @@ PP(pp_subst)
#ifdef PERL_OLD_COPY_ON_WRITE
have_a_cow:
#endif
rxtainted |= RX_MATCH_TAINTED(rx);
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG));
SAVEFREESV(dstr);
PL_curpm = pm;
if (!c) {
register PERL_CONTEXT *cx;
SPAGAIN;
/* note that a whole bunch of local vars are saved here for
* use by pp_substcont: here's a list of them in case you're
* searching for places in this sub that uses a particular var:
* iters maxiters r_flags oldsave rxtainted orig dstr targ
* s m strend rx once */
PUSHSUBST(cx);
RETURNOP(cPMOP->op_pmreplrootu.op_pmreplroot);
}
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
do {
if (iters++ > maxiters)
DIE(aTHX_ "Substitution loop");
rxtainted |= RX_MATCH_TAINTED(rx);
if (RX_MATCH_TAINTED(rx))
rxtainted |= SUBST_TAINT_PAT;
if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
m = s;
s = orig;
Expand Down Expand Up @@ -2387,7 +2402,6 @@ PP(pp_subst)
doutf8 |= DO_UTF8(dstr);
SvPV_set(dstr, NULL);

TAINT_IF(rxtainted & 1);
SPAGAIN;
if (rpm->op_pmflags & PMf_NONDESTRUCT)
PUSHs(TARG);
Expand All @@ -2397,9 +2411,28 @@ PP(pp_subst)
(void)SvPOK_only_UTF8(TARG);
if (doutf8)
SvUTF8_on(TARG);
TAINT_IF(rxtainted);
SvSETMAGIC(TARG);
SvTAINT(TARG);

if (PL_tainting) {
if ((rxtainted & SUBST_TAINT_PAT) ||
((rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) ==
(SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
)
(RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */

if (!(rxtainted & SUBST_TAINT_BOOLRET)
&& (rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT))
)
SvTAINTED_on(TOPs); /* taint return value */
else
SvTAINTED_off(TOPs); /* may have got tainted earlier */

/* needed for mg_set below */
PL_tainted =
cBOOL(rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT|SUBST_TAINT_REPL));
SvTAINT(TARG);
}
SvSETMAGIC(TARG); /* PL_tainted must be correctly set for this mg_set */
TAINT_NOT;
LEAVE_SCOPE(oldsave);
RETURN;
}
Expand Down

0 comments on commit 20be658

Please sign in to comment.