In [1]:
%load Advance_tech.sas

In [None]:
﻿/*###########SAS ADVANCE TECHIQUES:########*/

/*Exercise p302d01 on LAG() function*/

/*First 5 rows*/

proc sql number;
title "Table: pg3.weather_china_daily2017";
select *
from pg3.weather_china_daily2017;
quit;

/*Create a data set*/

data wheatherChina;
set pg3.weather_china_daily2017;
by City;
Temp1 = lag1(TavgC);  /*Lag1,2...n-->temp previous, second last, third last day and so on...*/
Temp2 = lag2(TavgC);
Temp3 = lag3(TavgC);
Temp4 = lag4(TavgC);
run;

/*A better version...*/

data whatherChina;
set pg3.weather_china_daily2017;
by City;
TempPrev=lag1(TavgC);
if first.City = 1 then TempPrev = .;
Tvariation = TavgC - TempPrev;
run;


/*Plotting some data*/

ods html path="&pathout" file='china_temps.html';
proc means data=work.whatherChina;
    class City;
    var Tvariation;
run;

ods graphics / width=10in height=5in imagemap=on tipmax=800;
proc sgplot data=work.whatherChina;
    series x=Date y=Tvariation / group=City tip=(Date Tvariation);
run;
ods html close;

/*Exercise p302a02 on moving average*/

/*first 5 rows*/
proc sql inobs=10;
title "Table: pg3.stocks_ABC";
select *
from pg3.stocks_ABC;
quit;

/*Create the data set*/

data stockABC;
set pg3.stocks_ABC;
Pre1 = lag1(Open);
Pre2=lag2(Open);
Moving3= mean(open,Pre1,Pre2);
format moving3 8.2;
run;

/*Exercise p302a03 on moving average*/

data stockABC(drop=close);
set pg3.stocks_ABC;
Pre1 = lag1(Open);
Pre2=lag2(Open);
if _N_ ge 3 then Moving3 = mean(Open,Pre1,Pre2);
format Moving3 7.2;
run;

/*Equal to..*/

data stockABC(drop=close);
set pg3.stocks_ABC; /*The values are different!!!*/
if _N_ ge 3 then Moving3 = mean(Open,lag1(Open),lag2(Open));
format Moving3 7.2;
run;


/*Exercise with find()and count()*/

/*first 5 rows*/

proc sql inobs=5;
select *
from pg3.tornado_2017narrative ;
quit;

/*Using find/count functions*/

data tornadoNarrative(drop= begintime);
set pg3.tornado_2017narrative ;
NumEF= count(Narrative,'EF');  /*Count the number of EF in the long string NARRATIVE*/
CountWord = countw(Narrative,' '); /*Count the number of word with a blanck space delimiter*/
EFStartposition = find(Narrative,'EF');
EFStartPosition2 = findw(Narrative, 'EF'); /*Equal to normal find if EF is follwed by "-", if EF is follewd by a number -->retunrs 0*/
NumberWordBeforeEF = findw(Narrative,'EF','012345- .,','e'); 
if NumberWordBeforeEF >0 then AfterEF = scan(narrative,NumberWordBeforeEF +1,'012345- .,'); 
run;

proc means data=tornadoNarrative maxdec=2;
var NumEF CountWord;
run;

proc freq data=tornadoNarrative order=freq;  /*tornado is the word more frequent after EF*/
tables AfterEF;
run;

/*Exercise p302p01 on LAG function*/

/*first 5 rows*/


proc sql inobs=5;
title "pg3.np_2016traffic";
select *
from pg3.np_2016traffic;
quit;

/*Using Lag() function*/

data traffic2016;
set pg3.np_2016traffic;
by ParkCode;
PreviousTraffic = lag1(TrafficCount);
if first.ParkCode = 1 then PreviousTraffic = .;
TrafficVariation = TrafficCount - PreviousTraffic;
run;

/*Exercise p302p02 on find/count function*/

/*first 5 rows*/

proc sql inobs=5;
title "pg3.np_grandcanyon";
select *
from pg3.np_grandcanyon;
quit;

/*using function*/

data grancanyon;
set pg3.np_grandcanyon;
NumSouth = count(Comments,"South","i"); /*i options ignores case sensitive*/
NumWordsbeforeSouth= findw(Comments,"South"," .","ei"); 
WordAfterSouth = scan(Comments,NumWordsbeforeSouth+1," .");
run; 

proc freq data= grancanyon order=freq;
tables WordAfterSouth;
run;

/*Exercise p302d04 on REGULAR EXPRESSION*/

/*First 5 rows */

proc sql inobs=5;
title "Table: pg3.phonenumbers_us";
select * 
from pg3.phonenumbers_us;
quit;

/*Using PRXmatch() function*/

/*PRXmatch(pattern,string)-->return the position of first pattern letter*/
                         /*-->If the function doesn't find the pattern returns 0*/

data Phone;
set pg3.phonenumbers_us;
Position= prxmatch('/([2-9]\d\d)-([2-9]\d\d)-(\d{4})/',Phone);
StartEnd =prxmatch('/^([2-9]\d\d)-([2-9]\d\d)-(\d{4})$/',strip(Phone)); /*To find string that starts and ends with 10-digit number-->no whit space at the beginning/end*/
StartEndParenthesis = prxmatch('/\([2-9]\d\d\)\s*[2-9]\d\d-(\d{4})/',Phone);
if Position ne 0 or StartEnd ne 0 or StartEndParenthesis ne 0;
run;



/*Using debug function*/


data work.ValidPhoneNumbers;
    set pg3.phonenumbers_us;
    putlog 'Iteration: ' _N_=;
    call prxdebug(1); /* Sends debugging output to the SAS log. */
    Exp='/([2-9]\d\d)-([2-9]\d\d)-(\d{4})/o';
    Loc=prxmatch(Exp,Phone);
run;


/*Exercise p302a06 on Regular expression*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.tornado_2017narrative";
select *
from pg3.tornado_2017narrative;
run;

/*Find row EF3,EF4,EF-3,EF-4*/

data Narrative(keep=State BeginDate Narrative);
set pg3.tornado_2017narrative;


proc print data=pg3.tornado_2017narrative;
var State BeginDate Narrative;
where prxmatch('/EF-?(3|4)/',Narrative)>0;
run;

/*Different alternative*/

/*where prxmatch('/(EF3|EF-3|EF4|EF-4)/',Narrative)>0;
    where prxmatch('/(EF-?3|EF-?4)/',Narrative)>0;
    where prxmatch('/EF-?(3|4)/',Narrative)>0;
    where prxmatch('/EF-?[34]/',Narrative)>0;*/

/*EXERCISE p302d05 on PRXCHANGE() function*/

/*first 5 rows*/

proc sql inobs=5;
title "pg3.weather_usstationshourly";
select *
from  pg3.weather_usstationshourly;
quit;

data weather(drop=Code);
set pg3.weather_usstationshourly;
Name_new = prxchange('s/ AP / AIRPORT /i',-1,Name); /*i to disable case sensitive*/
Name_new = prxchange('s/INT( |L|L.)/ INTERNATIONAL /i',-1,Name_new);
LongLatInv = prxchange('s/(-?\d+\.\d+)(@)(\d+\.\d+)/$3$2$1/',-1,LongLat);
run;

/*Exercise p302a07 on regular expression*/


/*First 5 rows*/

proc  sql inobs=5;
select *
from pg3.tornado_2017narrative;
quit;

data heather(drop= county begintime);
set pg3.tornado_2017narrative;
length Narrative_new $ 4242;
EFposition = prxmatch('/EF-/',Narrative);
Narrative_new = prxchange('s/EF-/EF/',-1,narrative);
run;

/*Exercise p302p04 on prxmatch()*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.np_acres";
select *
from pg3.np_acres;
quit;

/*finding string with N PRES(space) or N PRESERVE(space) or NPRES(space) or NPRE(space)*/

data acres;
set pg3.np_acres;
Position = prxmatch('/N PRES|N PRESERVE|NPRES|NPRE/',ParkName);
if Position >0;
ParkName_new = prxchange('s/N PRES\s|N PRESERVE\s|NPRES\s/NPRE/',-1,ParkName);
run;

/*Exercise p302p05 on prxchange*/

proc sql inobs=5;
select *
from sashelp.baseball;
quit;


data baseball(keep= name nameSurname);
set sashelp.baseball;
NameSurname = prxchange('s/(\w+\D?\w*)(.)(\s\w+\D?\w*\D?)/$3 $1/',-1,Name);
run;                    /*'s/(\w+\s?\D?\w*)(, )(\w+\s?\w*\b)/$3 $1/'*/

/*Exercise p303a01 on ARRAY (temperature conversion)*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.weather_dublinmadrid_monthly2017";
select *
from pg3.weather_dublinmadrid_monthly2017;
title "weatherDublin";
select *
from weatherDublin;
quit;

/*Create data set with array*/

data weatherDublin(drop=mounth);
set pg3.weather_dublinmadrid_monthly2017(keep= City Temp1-Temp12);
array Temperature[12] Temp1-Temp12;
do Mounth =1 to 12;
	Temperature[Mounth] = (Temperature[Mounth] -32)*5/9;
end;
format Temp1-Temp12 6.2;
run;

title "Average Celsius temeprature for Dublin and Madrid";
proc print data=weatherDublin;
run;
title;

/*Exercise p303a02 ON ARRAY with undefinited length*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.weather_dublinmadrid_monthly2018";
select *
from pg3.weather_dublinmadrid_monthly2018;
quit;

/*Dataset and array with undefinited length*/

data weather(drop=Mounth);
set pg3.weather_dublinmadrid_monthly2018;
array Temperature[*] temp:;
do Mounth=1 to dim(Temperature);
	Temperature[Mounth] = (Temperature[Mounth] -32)*5/9;
end;
format temp: 6.2;
run;

title "Average 2018 Celsius temperature for Dublin and Madrid";
proc print data=weather noobs;
run;
title;

/*Exercise p303d01 on ARRAY (more advance)*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.weather_dublinmadrid_monthly2017";
select *
from pg3.weather_dublinmadrid_monthly2017;
run;

/*Using  2 array in a dataset , second one (Farenht) creates new columns */

data tempq1(drop=Mounth);
set pg3.weather_dublinmadrid_monthly2017(drop= PrecipQ1-PrecipQ4);
array Farenht[3] Temp1-Temp3;
array Celsius[3] Tempc1-Tempc3;
do Mounth=1 to 3;
	Celsius[Mounth] = (Farenht[Mounth] -32)*5/9;
end;
format Tempc1-Tempc3 6.2;
run;


/*Creating third quarter*/

data tempq3(drop=Mounth);
set pg3.weather_dublinmadrid_monthly2017(keep= City Temp7-Temp9);
array Farenht[7:9] Temp7-Temp9;
array Celsius[7:9] Tempc7-Tempc9;
do Mounth=7 to 9;
	Celsius[Mounth] = (Farenht[Mounth] -32)*5/9;
end;
format Tempc7-Tempc9 6.2;
run;


/*Working on PrecipQ1-PrecipQ4-->avg for quarter*/

data quarter(drop=i sumquarter);
set pg3.weather_dublinmadrid_monthly2017(keep= City PrecipQ1-PrecipQ4);
array Quarter[4] PrecipQ1-PrecipQ4;
array Quarterpct[4] PctQ1-PctQ4;
sumquarter= sum(of PrecipQ1-PrecipQ4);
do i= 1 to 4;
	Quarterpct[i] = Quarter[i]/sumquarter;
end;
format PctQ1-PctQ4 percent8.2;
run;

/*Exercise p303a03 on rotating date with ARRAY*/

/*First five rows*/

proc sql;
title "pg3.weather_dublinmadrid_monthly5yr";
select *
from pg3.weather_dublinmadrid_monthly5yr;
run;

/*Rotating data without ARRAY*/

data Rotating_weather(drop=PrecipQ1-PrecipQ4);
set pg3.weather_dublinmadrid_monthly5yr(keep= city year PrecipQ1-PrecipQ4);
where city = "Dublin";
Quarter = 1; Precip=PrecipQ1*2.54; output;
Quarter = 2; Precip=PrecipQ2*2.54; output;
Quarter = 3; Precip=PrecipQ3*2.54; output;
Quarter = 4; Precip=PrecipQ4*2.54; output;
format Precip 6.2;
run;

/*Doing same thing with array...*/

data Rotating_weather(drop=PrecipQ1-PrecipQ4 i);
set pg3.weather_dublinmadrid_monthly5yr(keep= city year PrecipQ1-PrecipQ4);
where city = "Dublin";
array P[4] PrecipQ1-PrecipQ4;
do Quarter = 1 to 4;
Precip = P[Quarter];
output;
end;
format Precip 6.2;
run;


/*Plotting data...*/

title 'Average Quarterly Precipitation (CM) for Dublin';
proc sgplot data=work.Rotating_weather;
    vbar Quarter / response=Precip stat=mean datalabel;
                   /*datalabelattrs=(size=12pt);*/
    format Precip 6.2;
run;
title;

/*Exercise p303d02 on Array and macro from sql query*/

/*First 5 rows*/
proc sql inobs=5;
title "pg3.weather_dublinmadrid_monthly5yr";
select *
from pg3.weather_dublinmadrid_monthly5yr;
quit;

/*Creating ARRAY*/

data weather(drop=quarter Pavg1-Pavg4);
set pg3.weather_dublinmadrid_monthly5yr(drop= Temp1-Temp12);
where City = "Dublin";
array P[4] PrecipQ1-PrecipQ4;
array Pavg[4] (7.65 , 6.26 , 7.56 , 9.12);
array Status[4] $5 StatusQ1-StatusQ4;
do quarter =1 to 4;
	if P[quarter] > Pavg[quarter] then Status[quarter] = "Above";
	else if P[quarter] < Pavg[quarter] then Status[quarter] ="Under";
	else Status[quarter] = "equal";
end;
run;

/*Using temporary array */

data weather(drop=quarter);
set pg3.weather_dublinmadrid_monthly5yr(drop= Temp1-Temp12);
where City = "Dublin";
array P[4] PrecipQ1-PrecipQ4;
array Pavg[4] _temporary_ (7.65 , 6.26 , 7.56 , 9.12);  /*temporary option enable to ARRAY to manage variable in PDV but are not memorize in final dataset*/
array Status[4] $5 StatusQ1-StatusQ4;
do quarter =1 to 4;
	if P[quarter] > Pavg[quarter] then Status[quarter] = "Above";
	else if P[quarter] < Pavg[quarter] then Status[quarter] ="Under";
	else Status[quarter] = "equal";
end;
run;

/*Taking macro variable from a sql query*/

/*first 5 rows*/

proc sql inobs=5;
title "work.DublinPrecipRotate";
select *
from Rotating_weather;
quit;

proc sql;
select avg(Precip) format=6.1
into :AvgRPrecip separated by ","
from Rotating_weather
group by Quarter;
quit;

data weather(drop=quarter);
set pg3.weather_dublinmadrid_monthly5yr(drop= Temp1-Temp12);
where City = "Dublin";
array P[4] PrecipQ1-PrecipQ4;
array Pavg[4] _temporary_ (&AvgRPrecip );  /*temporary option enable to ARRAY to manage variable in PDV but are not memorize in final dataset*/
array Status[4] $5 StatusQ1-StatusQ4;
do quarter =1 to 4;
	if P[quarter] > Pavg[quarter] then Status[quarter] = "Above";
	else if P[quarter] < Pavg[quarter] then Status[quarter] ="Under";
	else Status[quarter] = "equal";
end;
run;

/*Exercise p303p01 on ARRAY*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.eu_occ";
select *
from pg3.eu_occ;
quit;

/*Using array*/

data Hotel(drop= i);
set pg3.eu_occ(drop= Geo);
array Day[3] Hotel ShortStay Camp;
array Pct[3] Hotelpct ShorStaypct Camppct;
Total = Hotel + ShortStay + Camp;
do i=1 to 3;
	Pct[i] = Day[i]/Total;
end;
format Hotelpct ShorStaypct Camppct percent8.2;
format Hotel ShortStay Camp Total comma16.;
run;

/*Exercise p303p02 on ARRAY and _temporary_ option*/

/*first 5 rows*/

proc sql inobs=5;
title "pg3.test_answers";
select *
from pg3.test_answers;
quit;

data Answer(drop=i);
set pg3.test_answers;
array answer[10] $1 q1-q10;
array true[10] $1 _temporary_ ("A", "C", "C", "B", "E", "E", "D", "B", "B", "A");
Score = 0;
do i=1 to 10;
	if answer[i] = true[i] then Score +1;
end;
run;

/*Exercise p303d04 on MULTYDIMENSIONAL ARRAY*/

proc sql inobs=5;
title "pg3.weather_dublin_daily5yr";
select *
from pg3.weather_dublin_daily5yr;
quit;

/*Using multidiensional array in a dataset*/

data DublinDaily;
set pg3.weather_dublin_daily5yr(keep=date tempdailyavg);
array MontlyAvg[2013:2014,3] _temporary_ (40.9, 40.7, 38.6, 
                            42.5, 42.6, 45.4);  /*with temporary options the values are not showed in PDV/final table*/
where day(date)=15 and month(date) le 3 and year(date) in (2013,2014);
month = month(date);
year =year(date);
TempMontlyAvg = MontlyAvg[year,month];
Difference = TempDailyAvg - TempMontlyAvg;
run;

/*p303a05 on multiarray*/

/*first 5 rows*/
proc sql inobs=5;
title "pg3.weather_dublin_daily5yr";
select *
from pg3.weather_dublin_daily5yr;
quit;

data weather;
set pg3.weather_dublin_daily5yr;
array P[2015:2017,2] _temporary_ (2.29, 1.04, 4.15, 2.34, 0.90, 2.44);
where month(date) le 2 and year(date) ge 2015 and Precip> 0.3 ;
Y=year(date);
M=month(date);
TotalMonth = P[Y,M];
PCTPrecip = Precip/TotalMonth;
format PCTPrecip percent7.2;
run;

/*Exercise p303d05 on filling a multiarray from a dataset*/

/*first 5 rows*/

proc sql inobs=5;
select *
from pg3.weather_dublin_monthly5yr;
quit;


data weather(drop= i Temp1-Temp12 Y M);
array dailyT[2013:2017,12] _temporary_;
if _N_ = 1 then do;
	do i=2013 to 2017;
		set pg3.weather_dublin_monthly5yr(keep=temp1-temp12);	
			dailyT[i,1]=temp1;
			dailyT[i,2]=temp2;
			dailyT[i,3]=temp3;
			dailyT[i,4]=temp4;
			dailyT[i,5]=temp5;
			dailyT[i,6]=temp6;
			dailyT[i,7]=temp7;
			dailyT[i,8]=temp8;
			dailyT[i,9]=temp9;
			dailyT[i,10]=temp10;	
			dailyT[i,11]=temp11;
			dailyT[i,12]=temp12;	
	end;
end;
set pg3.weather_dublin_daily5yr(keep= date TempDailyAvg);
where day(Date)=15;
Y=year(date);
M=month(date);
TempMonthlyAvg=dailyT[Y,M];
Tdifference = TempDailyAvg - TempMonthlyAvg;
run;

/*Exercise on ARRAY and _temporary_ option*/

/*First five rows*/

proc sql inobs=5;
title "set pg3.storm_stats";
select *
from pg3.storm_stats;
quit;

/*Using Array in a dataset...*/

data storm(keep= Name StartDate Season MaxWindMPH MaxWindSpeed SpeedDifference);
set pg3.storm_stats;
where season in (1980,1981);
Qtr = qtr(StartDate);
array MWTable[1980:1981,4] _temporary_ (132,121,190,138,127,109,138,127);
MaxWindSpeed=MWTable[Season,Qtr];
SpeedDifference = MaxWindMPH - MaxWindSpeed;
run;

/*Exercise p303p05 on _temporary_ array*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.storm_maxwindseasqtr";
select *
from pg3.storm_maxwindseasqtr;
quit;

/*Using array in a data set*/

data MaxWind (drop= i j MaxWindQ1-MaxWindQ4);
array MW[1980:1984,4] _temporary_;
if _N_ = 1 then do;
	do i=1980 to 1984;
		set pg3.storm_maxwindseasqtr;
			array MaxW[4] MaxWindQ1-MaxWindQ4;
			do j=1 to 4;
				MW[i,j] = MaxW[j];
			end; 
	end; 
end;
set pg3.storm_stats(keep=Name Season StartDate MaxWindMPH);
	where season between 1980 and 1984;
	quarter = qtr(StartDate);
	MaxWindM = MW[Season,quarter];
	WindDifference = MaxWindMPH - MaxWindM;
run;

/*Exercise p304d02 on HASH TABLE with LENGHT and CALL MISSING*/

/*First 5 rows*/

proc sql inobs=5;
title"pg3.population_usstates";
select *
from pg3.population_usstates;
quit;

/*Fill an hash table with a dataset (pg3.population_usstates)*/

data work.StateCityPopulation;
length StateName $20 Capital $14 StatePop2017 8; /*We need to have this variable in the PDV-->check with the data step debugger*/
if _N_ = 1 then do;
	declare hash States(dataset:"pg3.population_usstates");
	States.definekey('StateName');
	States.definedata('Capital','StatePop2017');
	States.definedone();
	call missing(StateName,Capital,StatePop2017); /*We need to set our variables in the to missing*/
end;
run;

/*Exercise equal to before*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.weather_ustop5_monthly2017";
select *
from pg3.weather_ustop5_monthly2017;
quit;

/*Putting dataset into the hash table*/

data Weather_monthly;
length City $20 Month 8 TempMonAvg 8 PrecipMonSum 8;
if _N_ = 1 then do;
	declare hash WM(dataset: 'pg3.weather_ustop5_monthly2017');
	WM.definekey('City','Month');
	WM.definedata('TempMonAvg','PrecipMonSum');
	WM.definedone();
	call missing(City,Month,TempMonAvg,PrecipMonSum);
end;
run;

/*Exercise p304d03 on object.find() function*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.population_usstates";
select *
from pg3.population_usstates;
title "pg3.population_uscities;";
select *
from pg3.population_uscities;
quit;

/*Using obj.find() to search values in a HASH TABLE*/
data Weather WeatherCapital;
if _N_ = 1 then do;
	if 0 then set pg3.population_usstates; /*equal to LENGTH + CALL MISSING of previous exercise*/
	declare hash Pop(dataset:'pg3.population_usstates');
	Pop.definekey('StateName');
	Pop.definedata('Capital','StatePop2017');
	Pop.definedone();
end;
set pg3.population_uscities;
StateName = stnamel(Statecode);
RC=Pop.find(key:StateName);
PctCity = CityPop2017/StatePop2017;
if RC ne 0 then call missing(Capital,StatePop2017); /*without this line the two vars are filled with values from previous PDV iteration*/
output Weather;
if Capital=Cityname then output WeatherCapital;
format PctCity percent8.2;
run;

/*Exercise p304p01 similar to previous one*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.np_codelookup";
select *
from pg3.np_codelookup;
title "pg3.np_acres2";
select*
from pg3.np_acres2;
quit;

/*Using hash tabel...*/

data Park(drop=RC);
if _N_ = 1 then do;
	if 0 then set pg3.np_codelookup;
	declare hash ParkTable(dataset:'pg3.np_codelookup');
	ParkTable.definekey('ParkCode');
	ParkTable.definedata('ParkName','Type');
	ParkTable.definedone();
end; 
set pg3.np_acres2;
ParkCode=upcase(ParkCode);
RC = ParkTable.find(key:ParkCode);
if RC eq 0;
run;


/*Exercise p304d04 on obj.add() and obj.output()*/

/*First five rows*/

proc sql inobs=5;
title "pg3.population_usstates";
select *
from pg3.population_usstates;
title "set pg3.population_uscities";
select *
from pg3.population_uscities;
quit;

/*Using HASH OBJ in a dataset*/

data City;
if _N_ =1 then do;
	if 0 then set pg3.population_usstates;
	declare hash HASHSTATES(dataset:'pg3.population_usstates');
	HASHSTATES.definekey('StateName');
	HASHSTATES.definedata('Capital','StatePop2017');
	HASHSTATES.definedone();
	declare hash CapitalSort(ordered:'descending');  /*when i use obj.output the dataset is sorts by key desc*/
	CapitalSort.definekey('PctPop');
	CapitalSort.definedata('PctPop','CityName','CityPop2017','StateName');
	CapitalSort.definedone();
end;
set pg3.population_uscities end=lastRow;
StateName = stnamel(StateCode);
RC=HASHSTATES.find(key:StateName);
if RC ne 0 then call missing(Capital,StatePop2017);
PctPop = CityPop2017/StatePop2017;
format PctPop percent8.4 ;
output work.City;
if CityName = Capital then CapitalSort.add();
if lastRow then CapitalSort.output(dataset:'work.CityCapitalSort');
run;

/*Exercise p304a03 on MULTIDATA options*/

data work.City;
if _N_=1 then do;
if 0 then set pg3.population_usstates;
	declare hash HashStates(dataset:'pg3.population_usstates');
	HashStates.definekey('StateName');
	HashStates.definedata('Capital','StatePop2017');
	HashStates.definedone();
	declare hash HashCapitalSort(ordered:'descending',multidata:'YES'); /*we need multidata because we want duplicate value on hast table key*/
	HashCapitalSort.definekey('PctPop');
	HashCapitalSort.definedata('PctPop','Cityname','CityPop2017','StateName');
	HashCapitalSort.definedone();
end;
set pg3.population_uscities end=lastRow;
Statename=stnamel(Statecode);
RC=HashStates.find(key:Statename);
if RC ne 0 then call missing(Capital,StatePop2017);
PctPop= round(citypop2017/statepop2017,0.1);
format PctPop percent8.2;
output city;
if CityName= Capital then HashCapitalSort.add();
if lastRow then HashCapitalSort.output(dataset: 'work.CityCapitalSort');
run;

/*Exercise p304p04 on MULTIDATA options*/

/*First five rows*/

proc sql inobs=5;
title "pg3.np_codelookup";
select *
from pg3.np_codelookup;
title "set pg3.np_acres2";
select *
from pg3.np_acres2;
quit;

/*Using HASH TABLE in a data set*/

data work.Acres;
if _N_ = 1 then do;
	if 0 then set pg3.np_codelookup;	
	declare hash ParkDesc(dataset:'pg3.np_codelookup');
	ParkDesc.definekey('ParkCode');
	ParkDesc.definedata('ParkName','Type');
	ParkDesc.definedone();
	declare hash AcreAge(ordered:'descending',multidata:'YES');
	AcreAge.definekey('GrossAcres');
	AcreAge.definedata('ParkCode','ParkName','Type','State','GrossAcres');
	AcreAge.definedone();
end;
set pg3.np_acres2 end=lastRow;
	ParkCode= upcase(ParkCode);
	RC=ParkDesc.find(key:ParkCode);
	if RC eq 0 then AcreAge.add();
	output work.Acres;
	If lastRow = 1 then AcreAge.output(dataset:'work.GrossAcres');
run;

title "Park by size";
proc print data=work.grossacres;
run;
title;

/*Exercise p304p07 on ITERATOR OBJECT*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.np_acres";
select *
from pg3.np_acres;
quit;

data work.lowAcres(drop=i) work.highAcres(drop=i);
if _N_ = 1 then do;
if 0 then set pg3.np_acres(keep= ParkName GrossAcres );
 declare hash Acres(dataset:'pg3.np_acres',ordered:'Ascending',multidata:'YES');
 Acres.definekey('GrossAcres');
 Acres.definedata('ParkName','GrossAcres');
 Acres.definedone();
 declare hiter A('Acres');	
end;
do i= 1 to 10;
	if i = 1 then A.first();
	else A.next();
	output work.lowAcres;
end;
do i=1 to 10;
   if i = 1 then A.last();
   else A.prev();
   output work.HighAcres;
end;
run;

/*Exercise p305d01 on DATETIME format with its layout(directive)*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.storm_detail";
select *
from pg3.storm_detail;
quit;

proc format;
	picture ISOformat (default =20)
	low-high = '%d%3B%y:%HH'
	(datatype=datetime);
run;

proc print data=pg3.storm_detail  (obs=10) noobs;
var Season Name ISO_time Latitude Longitude Region;
format Iso_time ISOformat.;
run;

/*Exercise p305a02 on DATE format*/
proc format;
	picture Mymonth(default=20)
	low-high = '%B of %Y'
	(datatype=date);
quit;


proc freq data=pg3.tornado_2017 order=freq;
tables begindate;
format BeginDate Mymonth.;
run;

/*Exercise p305p01 on DATE format*/

proc sort data=pg3.storm_final out=work.storm_final;
by descending StartDate;
run;

proc format;
	picture StormDate (default=20)
	low-high='%a.%B.%0d.%y'
	(datatype=date);
run;

proc print data=work.storm_final;
var Name BasinName StartDate EndDate MaxWindMPH MinPressure;
format StartDate EndDate StormDate.;
run;

/*Exercise p305d02 on format with number*/
proc format;
picture DamageFormat(round default=7)
low-<1000='009' (prefix='$' multiplier=1)
1000-<1000000 = '009k' (prefix='$' multiplier=.001)
1000000-High = '009M' (prefix='$' multiplier=.000001);
run;


proc sort data=pg3.tornado_2017 out=work.tornado_2017;
by descending PropertyDamage;
run;


title "Tornado of 2017";
proc print data=work.tornado_2017;
var State BeginDate Scale Deaths Injuries PropertyDamage;
format PropertyDamage DamageFormat.;
run;
title;


/*Exercise p305p02 on Number format*/

/*First five rows*/
proc sql inobs=5;
title "pg3.stocks";
select *
from pg3.stocks;
quit;

proc format;
picture VolumeFormat
		low-high='000,000,009 shares';
picture ChangeFormat (default=11)
		low-<0='009.99USD' (prefix="-")
		0-high='009.99USD';
run;


data work.stock;
set pg3.stocks (drop= High Low);
VolumeChar=catx('',put(Volume,comma18.),'shares');
DailyChange = Close - Open;
DailyChangeChar=catx('',DailyChange,'usd');
format DailyChange ChangeFormat. Volume VolumeFormat. ;
run;

/*Exercise p305d03 with functions*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.weather_ny_daily2017";
select *
from pg3.weather_ny_daily2017;  /*Tavg in F*/
title "pg3.weather_sydney_daily2017;";
select *
from pg3.weather_sydney_daily2017; /*Tavg in C*/
quit;

/*Creating the function*/

proc fcmp outlib=pg3.funcs.weather;
	function FtoC(TempF);
		TempC = round((TempF-32)*5/9,.01);
		return(TempC);
	endsub;
	function CtoF(TempC);
		TempF = round(TempC*9/5+32,.01);
		return(TempF);
	endsub;
run;

options cmplib=pg3.funcs;

data W_NewYork;
set pg3.weather_ny_daily2017;
TavgC = FtoC(Tavg);
run;

data W_Sidney;
set pg3.weather_sydney_daily2017;
TavgF = CtoF(Tavg);
run;


/*Exercise p305a04 on function*/

/*First 5 rows*/

proc sql inobs=5;
title"pg3.weather_ny_daily2017";
select *
from pg3.weather_ny_daily2017;
quit;

/*Create functions*/

proc fcmp outlib=pg3.funcs.weather;
	function IntoCM(PIn);
		CM = PIn*2.54;
		return(CM);
	endsub;
	function FtoC(F);
		Cel = round(F-32*5/9,.01);
		return(Cel);
	endsub; 
run;

options cmplib=pg3.funcs;

proc sql inobs=10;
title "Table with conversion";
select City, COuntry,Date, IntoCM(Precip) as PrecipCM ,FtoC(Tavg) as TavgC
from pg3.weather_ny_daily2017;
title "Original table";
select *
from pg3.weather_ny_daily2017;
quit;

/*Exercise p305p04 on functions*/

/*First 5 rows*/

proc sql inobs=5;
title "pg3.class_tests";
select *
from pg3.class_tests;
quit;

/*Create the function*/

proc fcmp outlib=pg3.myfunctions.class;
	function Fscore(T1,T2,T3,T4,F);
		Final= sum(of T1-T4,F*2)/6;
		return(round(Final,.01));
	endsub;
run;

options cmplib=pg3.myfunctions;

data work.Test;
set pg3.class_tests;
Fscore=Fscore(Test1,Test2,Test3,Test4,Final);
run;

/*Exercise p305p05 on functions*/

/*First 5 rows*/

proc sql inobs=5;
title "sashelp.baseball";
select Name ,Team
from sashelp.baseball;
quit;

/*Create the function*/

proc fcmp outlib=pg3.myfunctions.baseball;
 	function FL(LastFirst $) $25;
		FirstLast = cat(trim(left(scan(LastFirst,2)))," ",scan(LastFirst,1));
		return(FirstLast);
	endsub;
run;

options cmplib=pg3.myfunctions;

data baseball(keep=Name Team FirstLast);
	set sashelp.baseball;
	FirstLast=FL(Name);
run;
