In [2]:
import saspy
sas = saspy.SASsession(cfgname='oda')
# sas_html = saspy.SASsession(cfgname='oda', results='HTML')

SAS Connection established. Subprocess id is 3296



In [3]:
sas.submit("data cars; set sashelp.cars; run;")
cars = sas.sasdata('cars')

In [4]:
df = cars.head()
df.iloc[0,:]

Make             Acura
Model              MDX
Type               SUV
Origin            Asia
DriveTrain         All
MSRP           36945.0
Invoice        33337.0
EngineSize         3.5
Cylinders          6.0
Horsepower       265.0
MPG_City          17.0
MPG_Highway       23.0
Weight          4451.0
Wheelbase        106.0
Length           189.0
Name: 0, dtype: object

### transformations

In [5]:
cars.add_vars({'pw_ratio': 'weight / horsepower', 'overhang' : 'length - wheelbase'})
res = cars.head()
res.iloc[0,:]


26                                                         The SAS System                       Saturday, April  6, 2024 11:46:00 AM

162        
163        data WORK.'cars'n ; set WORK.'cars'n ;
164        pw_ratio = weight / horsepower;
165        overhang = length - wheelbase;
166        ; run;
167        
168        
169        

27                                                         The SAS System                       Saturday, April  6, 2024 11:46:00 AM

170        


Make               Acura
Model                MDX
Type                 SUV
Origin              Asia
DriveTrain           All
MSRP             36945.0
Invoice          33337.0
EngineSize           3.5
Cylinders            6.0
Horsepower         265.0
MPG_City            17.0
MPG_Highway         23.0
Weight            4451.0
Wheelbase          106.0
Length             189.0
pw_ratio       16.796226
overhang            83.0
Name: 0, dtype: object

In [6]:
cars.score(code="""
    score = msrp/10000 + horsepower/100 - weight/1000;
""")
cars.head().iloc[0,:]

Make               Acura
Model                MDX
Type                 SUV
Origin              Asia
DriveTrain           All
MSRP             36945.0
Invoice          33337.0
EngineSize           3.5
Cylinders            6.0
Horsepower         265.0
MPG_City            17.0
MPG_Highway         23.0
Weight            4451.0
Wheelbase          106.0
Length             189.0
pw_ratio       16.796226
overhang            83.0
score             1.8935
Name: 0, dtype: object

In [7]:
df = cars.to_df().loc[:1]
tmp = sas.df2sd(df, 'tmp')
print(f"{tmp.obs()=}")
tmp.append(df)
print(f"{tmp.obs()=}")

tmp.obs()=2

91                                                         The SAS System                       Saturday, April  6, 2024 11:46:00 AM

729        
730        proc append base=WORK.'tmp'n
731                    data=WORK.'_temp_df'n;
732        run;
733        
734        
735        

92                                                         The SAS System                       Saturday, April  6, 2024 11:46:00 AM

736        
tmp.obs()=4


### impute

In [8]:
cars.info()

Unnamed: 0,Type,Variable,N,Nmiss
0,char,Make,428.0,0.0
1,char,Model,428.0,0.0
2,char,Type,428.0,0.0
3,char,Origin,428.0,0.0
4,char,DriveTrain,428.0,0.0
5,numeric,MSRP,428.0,0.0
6,numeric,Invoice,428.0,0.0
7,numeric,EngineSize,428.0,0.0
8,numeric,Cylinders,428.0,2.0
9,numeric,Horsepower,428.0,0.0


In [9]:
cars.impute({'MEAN':['cylinders']})

Libref  = WORK
Table   = cars
Dsopts  = 
Results = Pandas

In [10]:
cars.info()

Unnamed: 0,Type,Variable,N,Nmiss
0,char,Make,428.0,0.0
1,char,Model,428.0,0.0
2,char,Type,428.0,0.0
3,char,Origin,428.0,0.0
4,char,DriveTrain,428.0,0.0
5,numeric,MSRP,428.0,0.0
6,numeric,Invoice,428.0,0.0
7,numeric,EngineSize,428.0,0.0
8,numeric,Cylinders,428.0,0.0
9,numeric,Horsepower,428.0,0.0


In [11]:
cars.modify(
    renamevars={'make':'brand'},
)
cars.info()


135                                                        The SAS System                       Saturday, April  6, 2024 11:46:00 AM

1061       
1062       proc datasets dd=WORK nolist; modify 'cars'n ;
1063       rename 'make'n = 'brand'n;
1064       ;run;
1064     !      quit;
1065       
1066       
1067       

136                                                        The SAS System                       Saturday, April  6, 2024 11:46:00 AM

1068       


Unnamed: 0,Type,Variable,N,Nmiss
0,char,brand,428.0,0.0
1,char,Model,428.0,0.0
2,char,Type,428.0,0.0
3,char,Origin,428.0,0.0
4,char,DriveTrain,428.0,0.0
5,numeric,MSRP,428.0,0.0
6,numeric,Invoice,428.0,0.0
7,numeric,EngineSize,428.0,0.0
8,numeric,Cylinders,428.0,0.0
9,numeric,Horsepower,428.0,0.0


In [12]:
cars.partition(
    kfold=10,
)
cars.head().iloc[0,:]

brand              Acura
Model                MDX
Type                 SUV
Origin              Asia
DriveTrain           All
MSRP             36945.0
Invoice          33337.0
EngineSize           3.5
Cylinders            6.0
Horsepower         265.0
MPG_City            17.0
MPG_Highway         23.0
Weight            4451.0
Wheelbase          106.0
Length             189.0
pw_ratio       16.796226
overhang            83.0
score             1.8935
_cvfold1             0.0
_cvfold2             1.0
_cvfold3             1.0
_cvfold4             1.0
_cvfold5             1.0
_cvfold6             1.0
_cvfold7             0.0
_cvfold8             0.0
_cvfold9             0.0
_cvfold10            0.0
Name: 0, dtype: object

In [13]:
cars.head()

Unnamed: 0,brand,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,...,_cvfold1,_cvfold2,_cvfold3,_cvfold4,_cvfold5,_cvfold6,_cvfold7,_cvfold8,_cvfold9,_cvfold10
0,Acura,MDX,SUV,Asia,All,36945.0,33337.0,3.5,6.0,265.0,...,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
1,Acura,RSX Type S 2dr,Sedan,Asia,Front,23820.0,21761.0,2.0,4.0,200.0,...,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0
2,Acura,TSX 4dr,Sedan,Asia,Front,26990.0,24647.0,2.4,4.0,200.0,...,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
3,Acura,TL 4dr,Sedan,Asia,Front,33195.0,30299.0,3.2,6.0,270.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0
4,Acura,3.5 RL 4dr,Sedan,Asia,Front,43755.0,39014.0,3.5,6.0,225.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0


In [15]:
cars.means()

Unnamed: 0,Variable,Label,N,NMiss,Median,Mean,StdDev,Min,P25,P50,P75,Max
0,MSRP,,428.0,0.0,27635.0,32774.85514,19431.716674,10280.0,20329.5,27635.0,39215.0,192465.0
1,Invoice,,428.0,0.0,25294.5,30014.700935,17642.11775,9875.0,18851.0,25294.5,35732.5,173560.0
2,EngineSize,Engine Size (L),428.0,0.0,3.0,3.196729,1.108595,1.3,2.35,3.0,3.9,8.3
3,Cylinders,,428.0,0.0,6.0,5.807512,1.554789,3.0,4.0,6.0,6.0,12.0
4,Horsepower,,428.0,0.0,210.0,215.885514,71.836032,73.0,165.0,210.0,255.0,500.0
5,MPG_City,MPG (City),428.0,0.0,19.0,20.060748,5.238218,10.0,17.0,19.0,21.5,60.0
6,MPG_Highway,MPG (Highway),428.0,0.0,26.0,26.843458,5.741201,12.0,24.0,26.0,29.0,66.0
7,Weight,Weight (LBS),428.0,0.0,3474.5,3577.953271,758.983215,1850.0,3103.0,3474.5,3978.5,7190.0
8,Wheelbase,Wheelbase (IN),428.0,0.0,107.0,108.154206,8.311813,89.0,103.0,107.0,112.0,144.0
9,Length,Length (IN),428.0,0.0,187.0,186.36215,14.357991,143.0,178.0,187.0,194.0,238.0


In [18]:
cars.sort("msrp")
cars.head()

Unnamed: 0,brand,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,...,_cvfold1,_cvfold2,_cvfold3,_cvfold4,_cvfold5,_cvfold6,_cvfold7,_cvfold8,_cvfold9,_cvfold10
0,Kia,Rio 4dr manual,Sedan,Asia,Front,10280.0,9875.0,1.6,4.0,104.0,...,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
1,Hyundai,Accent 2dr hatch,Sedan,Asia,Front,10539.0,10107.0,1.6,4.0,103.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0
2,Toyota,Echo 2dr manual,Sedan,Asia,Front,10760.0,10144.0,1.5,4.0,108.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
3,Saturn,Ion1 4dr,Sedan,USA,Front,10995.0,10319.0,2.2,4.0,140.0,...,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
4,Kia,Rio 4dr auto,Sedan,Asia,Front,11155.0,10705.0,1.6,4.0,104.0,...,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0


In [19]:
cars2 = cars.where("msrp < 20000")
print(f"{cars.obs()=}")
print(f"{cars2.obs()=}")

cars.obs()=428
cars2.obs()=98


In [21]:
cars2.add_vars({'msrp2': 'msrp * 2'})
cars.head().iloc[0,:]


271                                                        The SAS System                       Saturday, April  6, 2024 11:46:00 AM

2097       
2098       data WORK.'cars'n (where=(msrp < 20000) ); set WORK.'cars'n (where=(msrp < 20000) );
2099       msrp2 = msrp * 2;
2100       ; run;
2101       
2102       
2103       

272                                                        The SAS System                       Saturday, April  6, 2024 11:46:00 AM

2104       


brand                     Kia
Model          Rio 4dr manual
Type                    Sedan
Origin                   Asia
DriveTrain              Front
MSRP                  10280.0
Invoice                9875.0
EngineSize                1.6
Cylinders                 4.0
Horsepower              104.0
MPG_City                 26.0
MPG_Highway              33.0
Weight                 2403.0
Wheelbase                95.0
Length                  167.0
pw_ratio            23.105769
overhang                 72.0
score                  -0.335
_cvfold1                  1.0
_cvfold2                  0.0
_cvfold3                  1.0
_cvfold4                  0.0
_cvfold5                  0.0
_cvfold6                  1.0
_cvfold7                  1.0
_cvfold8                  0.0
_cvfold9                  1.0
_cvfold10                 1.0
msrp2                 20560.0
Name: 0, dtype: object