In [1]:
import saspy
import pandas as pd
from IPython.display import HTML

### Helpful Resources and References
<p style="font-size:14px">
<br>https://www.sas.com/content/dam/SAS/support/en/sas-global-forum-proceedings/2019/3189-2019.pdf
<br>https://www.sas.com/content/dam/SAS/support/en/sas-global-forum-proceedings/2019/3238-2019.pdf
<br>https://wrds-www.wharton.upenn.edu/documents/1441/SASPy_demo_basic_functions.html
<br>https://www.lexjansen.com/pharmasug/2009/sp/SP10.pdf
<br>https://www.lexjansen.com/phuse/2013/sp/SP05.pdf 
<br>https://www.pharmasug.org/proceedings/2015/SP/PharmaSUG-2015-SP06.pdf
</p>    

Note: Datasets are saved to sas on demand disk space.

In [2]:
sas_session = saspy.SASsession()

Using SAS Config named: oda
SAS Connection established. Subprocess id is 2038



In [3]:
libpath = "libname dstore '/home/sp16670/datasets';"

text = """
data dstore.bin1;
 set dstore.bin1;
 rename group = grp_bin;
 drop table;
 orig_order = _n_;
run;


data dstore.frq;
 set dstore.frq;
 rename group = grp_frq;
 drop table;
run;


proc sql;
create table dstore.binomfull as
select *, put(response, best.) || grp_bin as key from dstore.bin1 as t1 left join dstore.frq as t2 on 
 t1.grp_bin = t2.grp_frq;

create table dstore.binomfull2 as
select * from dstore.binomfull
where key in (
 select key from dstore.binomfull
 where label1 = "Proportion (P)" and (nvalue1 = percent/100)
)
order by orig_order;
quit;


data dstore.binomfullout;
 set dstore.binomfull2;
 by grp_bin;
 length desc $200;
 if first.grp_bin then do;
  desc = "Group = " || grp_bin || " Response = " || compress(trim(put(Response,best.)));
  output;
 end;
 group = grp_bin;
 Desc = label1;
 Val = cValue1;
 
 if last.grp_bin then do;
  output;
  group = "";
  desc = "";
  val = "";
  output;
 end;
 output;
 keep desc val;
run;
 
 """

def binomtest(sas):
    query = sas_session.submit(sas + text)
    return sas_session.sasdata(table = "binomfullout",libref = "dstore").to_frame().fillna("")

In [4]:
query = sas_session.submit(
libpath + """    

data dstore.occurance;
     do i = 1 to 4; group='A'; response=1; output; end;
     do i = 1 to 6; group='A'; response=0; output; end;
     do i = 1 to 6; group='B'; response=0; output; end;
run;
"""
)    

In [5]:
sas_session.assigned_librefs()

['WORK',
 'DSTORE',
 'SASDATA',
 'STPSAMP',
 'SASHELP',
 'MAPS',
 'MAPSSAS',
 'MAPSGFK',
 'SASUSER']

<br>
<br>
<p style="font-size:18px">Suppose there are a total of 16 subjects in the trial, 10 subjects In Group A and 6 subjects in Group B. PROC FREQ in. SAS can be used to compute the number of subjects, their proportions and the exact confidence intervals.</p>
<br>
<br>

In [6]:
sas_session.sasdata(table = "occurance",libref = "dstore").to_frame()

Unnamed: 0,i,group,response
0,1.0,A,1.0
1,2.0,A,1.0
2,3.0,A,1.0
3,4.0,A,1.0
4,1.0,A,0.0
5,2.0,A,0.0
6,3.0,A,0.0
7,4.0,A,0.0
8,5.0,A,0.0
9,6.0,A,0.0


In [7]:
sascode = libpath + """
 ods output binomial = dstore.bin1;
 ods output OneWayFreqs=dstore.frq;
 proc freq data=dstore.occurance;
  by group;
  tables response / binomial nocum norow;
  exact binomial;
 run;
 """
binomtest(sas=sascode)  

Unnamed: 0,desc,Val
0,Group = A Response = 0,
1,Proportion (P),0.6
2,ASE,0.1549
3,95% Lower Conf Limit,0.2964
4,95% Upper Conf Limit,0.9036
5,,
6,Exact Conf Limits,
7,95% Lower Conf Limit,0.2624
8,95% Upper Conf Limit,0.8784
9,,


<br>
<br>
<p style="font-size:18px">For ‘Group A’, there are 4 subjects with ‘Respsonse’ as “Yes”. Actual proportion of subjects is 0.40 and
the CIs should be computed for that proportion. SAS output from the PROC FREQ indicates that the 95% CI’s
(0.2624, 0.8784) obtained are for the proportion of subjects with Response =0 (p=0.6). Hence the result is incorrect.
<br>
<br>    
For ‘Group B’, since there are no subjects with ‘Response’ as “Yes”, the proportion is zero. There
also the 95% CIs obtained are for the (0.5407, 1.0000) the proportion of subjects with Response =0 (p=1). 
<br>
<br>    
In the above example, when both the levels are available (eg. Group A, 4 subjects are with ‘0 (No)’ and 6
subjects are with ‘1(Yes)’). So while computing the CIs, Lowest level i.e. ‘0’ is used and the CI which is computed for the proportion of subjects with response as ‘0(No)’.
    
To overcome this, it is advised to reset the level of variables in such a way that the response of interest should get the lowest level. To make the response ‘1(Yes)’ lower to the other, we can reset the ‘0 (No)’ to ‘2(No). Following code resets the ‘0’ to ‘2’ and then computes the CIs using PROC FREQ    
</p>
<br>
<br>

In [8]:
sascode = libpath + """
data dstore.occurance_2;
 set dstore.occurance;
 if response = 0 then response = 2;   
run;


ods output binomial = dstore.bin1;
ods output OneWayFreqs=dstore.frq;
proc freq data=dstore.occurance_2;
 by group;
 tables response / binomial nocum norow;
 exact binomial;
run;
"""
binomtest(sas=sascode)  

Unnamed: 0,desc,Val
0,Group = A Response = 1,
1,Proportion (P),0.4
2,ASE,0.1549
3,95% Lower Conf Limit,0.0964
4,95% Upper Conf Limit,0.7036
5,,
6,Exact Conf Limits,
7,95% Lower Conf Limit,0.1216
8,95% Upper Conf Limit,0.7376
9,,


In [9]:
sas_session.sasdata(table = "occurance_2",libref = "dstore").to_frame()

Unnamed: 0,i,group,response
0,1.0,A,1.0
1,2.0,A,1.0
2,3.0,A,1.0
3,4.0,A,1.0
4,1.0,A,2.0
5,2.0,A,2.0
6,3.0,A,2.0
7,4.0,A,2.0
8,5.0,A,2.0
9,6.0,A,2.0


<br>
<br>
<p style="font-size:18px">Above SAS output indicates that the CI values for ‘Group A’ are now computed correctly for the proportion of subjects with Response =1 (p=0.4).and the values are changed to (0.1216, 0.7376). 
<br>
<br>    
For Group B, there are no subjects with Response=1 (ie. the resulting proportion is zero but level ‘1(Yes)’ is
missing in the dataset). In the absence of a lower level ‘1(Yes), PROC freq considers the level ‘2(No)’ as the lowest level and computes the confidence intervals for the proportion of subjects with Response =2 (p=1).
<br>
<br>
So it can be observed that even after resetting the ‘0’ to ‘2’, 95% CIs (0.5407, 1.0000) obtained from PROC FREQ are not correct.
<br>
<br>
When a required level is missing, we need to add records to the dataset and then to make use the ‘WEIGHT’
statement in PROC FREQ to consider only the relevant records for the CI computations.
<br>
<br>
To add records with lowest level of the target variable, we can create a dataset which has the lowest level against all the treatments. For the above example, the following dataset can be used to add records to the existing ones.            
<br>
<br>    
If this dataset is merged with the original one, using Group and Response as BY variables, it should add a new record to the existing one with Response=1. If all the treatments contain at least one subject with Response=1, no records will be added.
<br>
<br>    
Also a new variable need to be added (here it is wgt) to the dataset in such a way that the newly added records will get a value of ‘0’ and the already existing records will get a value of ‘1’. This variable can then be used in the WEIGHT statement in PROC FREQ to compute CIs correctly (for Response=1), by taking the proportion as zero. SAS code that can be used to add new records and to create weight variable is as follows    
</p>
<br>
<br>

In [10]:
query = sas_session.submit(
    libpath + 
 """
data dstore.occurance_wt;
     group='A'; response=1; output;
     group='B'; response=1; output;
run; 

data dstore.occurance_3;
 merge dstore.occurance_2(in=a) dstore.occurance_wt(in=b);
 by group response;
 if b and not a then wgt=0;
 else wgt=1;
run;
""")

In [11]:
sas_session.sasdata(table = "occurance_3",libref = "dstore").to_frame()

Unnamed: 0,i,group,response,wgt
0,1.0,A,1.0,1.0
1,2.0,A,1.0,1.0
2,3.0,A,1.0,1.0
3,4.0,A,1.0,1.0
4,1.0,A,2.0,1.0
5,2.0,A,2.0,1.0
6,3.0,A,2.0,1.0
7,4.0,A,2.0,1.0
8,5.0,A,2.0,1.0
9,6.0,A,2.0,1.0


<br>
<br>
<p style="font-size:18px">
Then the WEIGHT statement with ‘zeroes’ option can be used to compute the correct proportions and CIs. PROC
FREQ code with WEIGHT statement is as follows. 
<br>
<br>    
It can be observed that the Binomial Proportion is now computed for Response=1 and the computed proportion is 0. Also this provides the correct t 95% CIs (0.0000, 0.4593) for the zero proportion.     
</p>
<br>
<br>

In [12]:
sascode = libpath + """
ods output binomial = dstore.bin1;
ods output OneWayFreqs=dstore.frq;
proc freq data=dstore.occurance_3;
 by group;
 tables response / binomial nocum norow;
 exact binomial;
 weight wgt/zeroes;    
run;
"""
binomtest(sas=sascode)  

Unnamed: 0,desc,Val
0,Group = A Response = 1,
1,Proportion (P),0.4
2,ASE,0.1549
3,95% Lower Conf Limit,0.0964
4,95% Upper Conf Limit,0.7036
5,,
6,Exact Conf Limits,
7,95% Lower Conf Limit,0.1216
8,95% Upper Conf Limit,0.7376
9,,


In [13]:
print(sas_session.submit("""
 proc datasets library=dstore kill;
  run;
 quit;
""")['LOG'])


107                                                        The SAS System                         Sunday, April 10, 2022 05:22:00 PM

850        ods listing close;ods html5 (id=saspy_internal) file=_tomods1 options(bitmap_mode='inline') device=svg style=HTMLBlue;
850      ! ods graphics on / outputfmt=png;
851        
852        
853         proc datasets library=dstore kill;
854          run;
855         quit;
856        
857        
858        
859        ods html5 (id=saspy_internal) close;ods listing;
860        

108                                                        The SAS System                         Sunday, April 10, 2022 05:22:00 PM

861        


In [14]:
sas_session.disconnect()

'Succesfully disconnected. Be sure to have a valid network connection before submitting anything else.'