# Data Manipulation

## WHERE statement
WHERE statement tells the PROC statement where to run

In [4]:
data style;
    infile 'Artists.dat';
    input name $ 1-21 Genre $ 23-40 Origin $ 42;
run;
proc print data = style;
    where genre = 'Impressionism';
    title 'Major Impressionist Painters';
    footnote 'F = France N = Netherlands U = US';
run;

Obs,name,Genre,Origin
1,Mary Cassatt,Impressionism,U
3,Edgar Degas,Impressionism,F
5,Claude Monet,Impressionism,F
6,Pierre Auguste Renoir,Impressionism,F


## SORT

In [15]:
data marine;
    infile "Lengths.dat";
    input name $ Family $ Length @@;
run;
/* sort the data*/
proc sort data = marine out = seasort nodupkey;
    by family descending length;
proc print data = seasort;
    title 'Whales and Sharks';
run;

Obs,name,Family,Length
1,humpback,,50.0
2,whale,shark,40.0
3,basking,shark,30.0
4,mako,shark,12.0
5,dwarf,shark,0.5
6,blue,whale,100.0
7,sperm,whale,60.0
8,gray,whale,50.0
9,killer,whale,30.0
10,beluga,whale,15.0

Obs,name,Family,Length
1,humpback,,50.0
2,whale,shark,40.0
3,basking,shark,30.0
4,mako,shark,12.0
5,dwarf,shark,0.5
6,blue,whale,100.0
7,sperm,whale,60.0
8,gray,whale,50.0
9,killer,whale,30.0
10,beluga,whale,15.0


The following program:
* Reads the data
* computes money earned (profit)
* sorts the data by classroom
* prints data by class
    * sums profit totals

In [7]:
data sales;
    infile "CandySales.dat";
    input name $ 1-11 Class @ 15 DateReturned MMDDYY10. CandyType $ Quantity;
    Profit = Quantity * 1.25;
proc sort data = sales;
    by class;
proc print data = sales;
    by class;
    sum profit;
    var name datereturned candytype profit;
    title 'candy sales for field trip by class';
run;

Obs,name,DateReturned,CandyType,Profit
1,Nathan,19073.0,CD,23.75
2,Matthew,19073.0,CD,17.5
3,Claire,19074.0,CD,13.75
4,Chris,19077.0,CD,7.5
Class,,,,62.5

Obs,name,DateReturned,CandyType,Profit
5,Adriana,19073.0,MP,8.75
6,Ian,19076.0,MP,22.5
7,Anthony,19077.0,MP,16.25
8,Erika,19077.0,MP,21.25
Class,,,,68.75
,,,,131.25


### Formatted Printing

In [9]:
data sales;
    infile "CandySales.dat";
    input name $ 1-11 Class @ 15 DateReturned MMDDYY10. CandyType $ Quantity;
    Profit = Quantity * 1.25;
proc print data = sales;
    var name datereturned candytype profit;
    format datereturned date9. profit dollar6.2;
    title 'candy sales data using date formats';
run;

Obs,name,DateReturned,CandyType,Profit
1,Adriana,21MAR2012,MP,$8.75
2,Nathan,21MAR2012,CD,$23.75
3,Matthew,21MAR2012,CD,$17.50
4,Claire,22MAR2012,CD,$13.75
5,Ian,24MAR2012,MP,$22.50
6,Chris,25MAR2012,CD,$7.50
7,Anthony,25MAR2012,MP,$16.25
8,Erika,25MAR2012,MP,$21.25


## Custom Formats

In [19]:
data carsurvey;
    infile "Cars.dat";
    input age sex income color $;
proc format;
    value gender 1 = 'Male'
                 2 = 'Female';
    value agegroup 13 -< 20 = 'Teen'
                    20 -< 65 = 'Adult'
                    65 - HIGH = 'Senior';
    value $col 'W' = 'Moon White'
                'B' = 'Sky Blue'
                'Y' = 'Sunburst Yellow'
                'G' = 'Rain Cloud Grey';
proc print data = carsurvey;
    format sex gender. age agegroup. color $col. income DOLLAR8.;
    title 'survey results printed with user-defined formats';
run;

Obs,age,sex,income,color
1,Teen,Male,"$14,000",Sunburst Yellow
2,Adult,Male,"$65,000",Rain Cloud Grey
3,Senior,Female,"$35,000",Sky Blue
4,Adult,Male,"$44,000",Sunburst Yellow
5,Adult,Female,"$83,000",Moon White
