In [1]:
import saspy
sas = saspy.SASsession()

Using SAS Config named: oda
SAS Connection established. Subprocess id is 21884



### basic

In [2]:
sd = sas.sasdata("cars","sashelp")
sd.head()

Unnamed: 0,Make,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
0,Acura,MDX,SUV,Asia,All,36945.0,33337.0,3.5,6.0,265.0,17.0,23.0,4451.0,106.0,189.0
1,Acura,RSX Type S 2dr,Sedan,Asia,Front,23820.0,21761.0,2.0,4.0,200.0,24.0,31.0,2778.0,101.0,172.0
2,Acura,TSX 4dr,Sedan,Asia,Front,26990.0,24647.0,2.4,4.0,200.0,22.0,29.0,3230.0,105.0,183.0
3,Acura,TL 4dr,Sedan,Asia,Front,33195.0,30299.0,3.2,6.0,270.0,20.0,28.0,3575.0,108.0,186.0
4,Acura,3.5 RL 4dr,Sedan,Asia,Front,43755.0,39014.0,3.5,6.0,225.0,18.0,24.0,3880.0,115.0,197.0


### limited cols, missing, bool cols

In [None]:
sas.submitLST(f"""
    data cars;
        set sashelp.cars;
        if strip(model) in ('TSX 4dr', 'TL 4dr', 'MDX') then do;
            msrp = .;
            invoice = .;
            mpg_city = .;
            mpg_highway = .;
        end;
        if strip(model) in ('3.5 RL 4dr') then do;
            type = '';
            origin = '';
        end;
        if msrp > 32762 then expensive = 1;
        else expensive = 0;
        keep model type origin msrp invoice mpg_city mpg_highway expensive;
    run;
              
    proc sort data=cars nodupkey;
        by model;
    run;

    proc print data=cars (obs=5);
    run;
""")

Obs,Model,Type,Origin,MSRP,Invoice,MPG_City,MPG_Highway,expensive
1,3.5 RL 4dr,,,"$43,755","$39,014",18,24,1
2,3.5 RL w/Navigation 4dr,Sedan,Asia,"$46,100","$41,100",18,24,1
3,300M 4dr,Sedan,USA,"$29,865","$27,797",18,27,0
4,300M Special Edition 4dr,Sedan,USA,"$33,295","$30,884",18,27,1
5,325Ci 2dr,Sedan,Europe,"$30,795","$28,245",20,29,0


### from and into predictions

In [4]:
# 3 category prediction
sas.submitLST(f"""
    proc logistic data=sashelp.cars order=freq noprint;
        model origin (event='Asia') = mpg_city mpg_highway weight / link=glogit;
        ouput out=cars_est predprobs=INDIVIDUAL;
    
    proc print data=cars_est (obs=5);
        var origin mpg_city mpg_highway weight _from_ _into_;
    run;
""")

Obs,Origin,MPG_City,MPG_Highway,Weight,_FROM_,_INTO_
1,Asia,17,23,4451,Asia,USA
2,Asia,24,31,2778,Asia,Asia
3,Asia,22,29,3230,Asia,Asia
4,Asia,20,28,3575,Asia,USA
5,Asia,18,24,3880,Asia,USA


### explore

In [5]:
sas.submit(f"""
    proc freq data=cars order=freq noprint;
        tables type * origin / out=_tmp;
    run;
""")
df = sas.sasdata("_tmp","work").to_df()
df

Unnamed: 0,Type,Origin,COUNT,PERCENT
0,,,1.0,
1,Sedan,Asia,92.0,21.698113
2,Sedan,USA,90.0,21.226415
3,Sedan,Europe,76.0,17.924528
4,SUV,Asia,25.0,5.896226
5,SUV,USA,25.0,5.896226
6,SUV,Europe,10.0,2.358491
7,Sports,Asia,17.0,4.009434
8,Sports,USA,9.0,2.122642
9,Sports,Europe,23.0,5.424528


In [6]:
df = sas.sasdata("cars","work")
df.dsopts["keep"] = "msrp invoice"
df.means()

Unnamed: 0,Variable,N,NMiss,Median,Mean,StdDev,Min,P25,P50,P75,Max
0,MSRP,422.0,3.0,27490.0,32762.447867,19564.331532,10280.0,20310.0,27490.0,39250.0,192465.0
1,Invoice,422.0,3.0,25192.5,30002.078199,17762.092926,9875.0,18713.0,25192.5,35777.0,173560.0


In [7]:
sas.submitLST(f"""
proc sql;
    select
        type, origin,
        count(*) as count,
        count(case when msrp is not null then 1 end) as count_msrp,
        avg(msrp) as avg_msrp
    from cars
    group by 1,2
    order by 1,2
    ;
run;
""")

Type,Origin,count,count_msrp,avg_msrp
,,1,1,43755.0
Hybrid,Asia,3,3,19920.0
SUV,Asia,25,24,29261.67
SUV,Europe,10,10,48346.0
SUV,USA,25,25,34589.2
Sedan,Asia,92,90,22260.31
Sedan,Europe,76,76,43171.97
Sedan,USA,90,90,25638.83
Sports,Asia,17,17,32510.65
Sports,Europe,23,23,71998.7
