# Intro

## Reading Data

### .Dat

In [24]:
data toads;
    infile "ToadJump.dat";
    input toadname $ weight jump1 jump2 jump3;
run;
proc print data = toads;
    title 'sas data set toads';
run;

Obs,toadname,weight,jump1,jump2,jump3
1,Lucky,2.3,1.9,.,3.0
2,Spot,4.6,2.5,3.1,0.5
3,Tubs,7.1,.,.,3.8
4,Hop,4.5,3.2,1.9,2.6
5,Noisy,3.8,1.3,1.8,1.5
6,Winner,5.7,.,.,.


### .CSV

In [29]:
proc import datafile = 'Bands2.csv' out = music replace;
run;
proc print data = music;
    title 'customers at each gig';
run;

Obs,Band Name,Gig Date,Eight PM,Nine PM,Ten PM,Eleven PM
1,Lupine Lights,12/03/2012,45,63,70,.
2,Awesome Octaves,12/15/2012,17,28,44,12
3,"Stop, Drop, and Rock-N-Roll",01/05/2013,34,62,77,91
4,The Silveyville Jazz Quartet,01/18/2013,38,30,42,43
5,Catalina Converts,01/31/2013,56,.,65,34


## 2 Parts of SAS Program

"Data" &  "Proc"

In [17]:
Data Distance;
    Miles = 26.22;
    Kilometers = 1.61 * Miles;
Proc Print Data = Distance;
Run;

Obs,Miles,Kilometers
1,26.22,42.2142


## Creating Variables

In [34]:
data x;
    lol = 5;
proc print data = x;
run;

Obs,lol
1,5


In [45]:
data homegarden;
    infile 'Garden.dat';
    input name $ 1-7 Tomato Zucchini Peas Grapes;
    Total = Tomato + Zucchini + Peas + Grapes;
    PerTom = (Tomato/Total)*100;
run;
proc print data = homegarden;
    title 'home gardening survey';
run;

Obs,name,Tomato,Zucchini,Peas,Grapes,Total,PerTom
1,Gregor,10,2,40,0,52,19.2308
2,Molly,15,5,10,1000,1030,1.4563
3,Luther,50,10,15,50,125,40.0000
4,Susan,20,0,.,20,.,.


## Control Structures

### IF Statements

* Use data to call an existing data file and then 'SET' to create the new one
* If more than one action as a result of if condition, use 'DO' and 'END'

In [47]:
data homegarden2;
    set homegarden;
    if name = 'Gregor' then Status = 'classic';
    if Tomato > 15 or Zucchini = 10 then Status = 'lol';
    if Name IN ('Molly','Susan') then do;
        Gender = 'Female';
        Status2 = 2;
    end;
run;
proc print data = homegarden2;
run;

Obs,name,Tomato,Zucchini,Peas,Grapes,Total,PerTom,Status,Gender,Status2
1,Gregor,10,2,40,0,52,19.2308,classic,,.
2,Molly,15,5,10,1000,1030,1.4563,,Female,2
3,Luther,50,10,15,50,125,40.0000,lol,,.
4,Susan,20,0,.,20,.,.,lol,Female,2


### If Then/ Else

In [49]:
data homeimprovements;
    infile 'Home.dat';
    input Owner $ 1-7 Description $ 9-33 Cost;
    if cost = . then costgroup = 'missing';
        else if cost < 2000 then costgroup = 'low';
        else if cost < 10000 then costgroup = 'medium';
        else costgroup = 'medium';
run;
proc print data = homeimprovements;
    title 'home improvements cost groups';
run;

Obs,Owner,Description,Cost,costgroup
1,Bob,kitchen cabinet face-lift,1253.00,low
2,Shirley,bathroom addition,11350.70,medium
3,Silvia,paint exterior,.,missing
4,Al,backyard gazebo,3098.63,medium
5,Norm,paint interior,647.77,low
6,Kathy,second floor addition,75362.93,medium


## Subsetting Data

### Selecting with IF Statement

In [51]:
data comedy;
    infile 'Shakespeare.dat';
    input Title $ 1-26 Year Type $;
    if type = 'comedy';
run;
proc print data = comedy;
    title 'shakesperean comedies';
run;

Obs,Title,Year,Type
1,A Midsummer Night's Dream,1595,comedy
2,Comedy of Errors,1590,comedy
3,Taming of the Shrew,1593,comedy


### Deleting non selected rows

In [53]:
data comedy;
    infile 'Shakespeare.dat';
    input Title $ 1-26 Year Type $;
    if type = 'tragedy' or type = 'romance' or type = 'history' then delete;
run;
proc print data = comedy;
    title 'shakesperean comedies';
run;

Obs,Title,Year,Type
1,A Midsummer Night's Dream,1595,comedy
2,Comedy of Errors,1590,comedy
3,Taming of the Shrew,1593,comedy


## Retain

Use Retain to perform Window functions like cumulative sum (shown below)

In [59]:
data gamestats;
    infile 'Games.dat';
    input Month 1 Day 3-4 Team $ 6-25 Hits 27-28 Runs 30-31;
    retain MaxRuns;
    MaxRuns = MAX(MaxRuns,Runs);
    RunsToDate + Runs;
run;
proc print data = gamestats;
    title "Seasons Record to Date";
run;

Obs,Month,Day,Team,Hits,Runs,MaxRuns,RunsToDate
1,6,19,Columbia Peaches,8,3,3,3
2,6,20,Columbia Peaches,10,5,5,8
3,6,23,Plains Peanuts,3,4,5,12
4,6,24,Plains Peanuts,7,2,5,14
5,6,25,Plains Peanuts,12,8,8,22
6,6,30,Gilroy Garlics,4,4,8,26
7,7,1,Gilroy Garlics,9,4,8,30
8,7,4,Sacramento Tomatoes,15,9,9,39
9,7,4,Sacramento Tomatoes,10,10,10,49
10,7,5,Sacramento Tomatoes,2,3,10,52


## Arrays

The following program changes all the 9s to missing values. Alternative would be to write 10 if statements.

In [61]:
data songs;
    infile 'KBRK.dat';
    input city $ 1-15 Age domk wj hwow simbh kt aomm libm tr filp ttr;
    array song (10) domk wj hwow simbh kt aomm libm tr filp ttr;
    do i = 1 to 10;
        if song(i) = 9 then song(i) = .;
    end;
run;
proc print data = songs;
    title ' wbrk song survey';
run;

Obs,city,Age,domk,wj,hwow,simbh,kt,aomm,libm,tr,filp,ttr,i
1,Albany,54,3,.,4,4,.,.,33,2,.,3,11
2,Oakland,27,3,.,4,2,3,.,41,3,5,4,11


## Shortcuts for Variable Lists

### Numbered range lists

In [None]:
/* #variable list
input cat8 cat9 cat10 cat11 cat 12;
#abbreviated list
input cat8 - cat12 */

### Name range lists

In [None]:
/* data example;
    input y a c h r;
    b = c + r;
run; */

In [None]:
/* specify using PUT

#variable list
PUT y a c hr;

#abbreviated list
put y -- b */

### Name prefix lists

In [None]:
/* # variable list */
/* dogbills = sum(dogvet,dogfood,dog_care);  */
/* # abbreviated list */
/* dogbills = sum(of dog:); */

### Re-work prior example using name list

In [66]:
data songs;
    infile 'KBRK.dat';
    input city $ 1-15 Age domk wj hwow simbh kt aomm libm tr filp ttr;
    array new (10) song1 - song10; /*named range list*/
    array old (10) domk -- ttr; /*numbered range list*/
    do i = 1 to 10;
        if old(i) = 9 then new(i) = .;
            else new(i) = old(i);
    end;
    avgscore = mean(of song1 - song10);
proc print data = songs;
    title ' wbrk song survey';
run;

Obs,city,Age,domk,wj,hwow,simbh,kt,aomm,libm,tr,filp,ttr,song1,song2,song3,song4,song5,song6,song7,song8,song9,song10,i,avgscore
1,Albany,54,3,9,4,4,9,.,33,2,9,3,3,.,4,4,.,.,33,2,.,3,11,8.16667
2,Oakland,27,3,9,4,2,3,.,41,3,5,4,3,.,4,2,3,.,41,3,5,4,11,8.125
