# Week 14 (Part 2)


### How to list out the the current directory name

In [7]:
import os
print("Path at terminal when executing this file")
print(os.getcwd() + "\n")

Path at terminal when executing this file
C:\Data



### How to list out the current directory name using the Jupyter notebook magic command

In [1]:
%pwd

'C:\\Users\\pmuhuri'

In [4]:
import os
os.environ["TEMP"]

'C:\\Users\\pmuhuri\\AppData\\Local\\Temp'

import os
for a in os.environ:
    print(a, os.getenv(a))


import os
for a in os.environ:
    print('Var: ', a, 'Value: ', os.getenv(a))
print("all done")

### How to list all subdirectories and files from a specified directory
#### (a horizontal list of entries, not a vertical list)

In [2]:
import os
print(os.listdir("C:\\Misc")) 

['datasets_handler.py', 'dump_data.py', 'locate_datasets.py', 'resources.tar.gz', 'spd.sas', 'Subdir1', 'Subdir2', 'support.py', 'Sysfunc.sas', '__init__.py']


### How to list all subdirectories and files but not the name of root directory (vertical list of entrie)

In [3]:
import os
names = os.listdir("C:\\Misc")
for p in names:
    print(p)

datasets_handler.py
dump_data.py
locate_datasets.py
resources.tar.gz
spd.sas
Subdir1
Subdir2
support.py
Sysfunc.sas
__init__.py


### How to list all files with a particular extension (.SAS) along with the root directory name but no subdirectories (using the glob library)

In [4]:
import glob
path = 'C:\\Misc'
files = (f for f in glob.glob(path + '**/*.sas', recursive=True))
for f in files:
    print(f)

C:\Misc\spd.sas
C:\Misc\Sysfunc.sas


### How to list all files along with the root directory name but no names of subdirectories (using the OS library)

In [5]:
import os
path = 'C:\\Misc'
files = []
for r, d, f in os.walk(path):
    for file in f:
        if '.sas' in file:
            files.append(os.path.join(r, file))
            
for f in files:
    print(f)

C:\Misc\spd.sas
C:\Misc\Sysfunc.sas


### How to list all subdirectories but no files (using the OS library)

In [6]:
import os
path = 'C:\\Misc'
folders = []
for r, d, f in os.walk(path):
    for folder in d:
        folders.append(os.path.join(r, folder))
            
for f in folders:
    print(f)

C:\Misc\Subdir1
C:\Misc\Subdir2


### How to list all subdirectories but no files (using the glob library)

In [14]:
from pathlib import Path
dir =  Path('C:\\Misc')
files = dir.glob('*.sas')
for i in files:
    print(i)

C:\Misc\spd.sas
C:\Misc\Sysfunc.sas


### How to list all files along with the parent directory name, and the date created but no names for subdirectories

In [7]:
import pandas as pd
from pathlib import Path
import time

p = Path("C:\\Misc")
all_files = []
for i in p.rglob('*.SAS'):
    all_files.append((i.name, i.parent, time.ctime(i.stat().st_ctime)))

columns = ["File_Name","Parent", "Created"]
df = pd.DataFrame.from_records(all_files, columns=columns)
print(df.to_string(index=False))


   File_Name   Parent                   Created
     spd.sas  C:\Misc  Sat Aug 31 15:06:16 2019
 Sysfunc.sas  C:\Misc  Sat Aug 31 15:09:04 2019


[How to select rows and columns in Pandas using [ ], .loc, iloc, .at and .iat](https://www.kdnuggets.com/2019/06/select-rows-columns-pandas.html)

In [15]:
from pathlib import Path
my_file = Path("C:\\Data\\Create_formats.sas")
my_file.is_file() 

True

### List out the name of the file if it exists in any folder(s) (using the OS library)

In [17]:
import os
path = 'C:\\'
files = []
for r, d, f in os.walk(path):
    for file in f:
        if 'spd.sas' in file:
            files.append(os.path.join(r, file))
            
for f in files:
    print(f)

C:\Confidential_Assessments\Stat6197_A2\spd.sas
C:\Confidential_Assessments\_Assignment2\spd.sas
C:\Data\spd.sas
C:\Misc\spd.sas
C:\Program Files\SASHome\SASFoundation\9.4\graph\sample\gkpspd.sas
C:\Program Files\_SASHome\SASFoundation\9.4\graph\sample\gkpspd.sas
C:\Users\pmuhuri\Downloads\spd.sas


### List out the name of the file if it exists in any folder (using the glob library - more efficient and faster)

In [18]:
import glob
path = 'C:\\'
files = (f for f in glob.glob(path + '**/spd.sas', recursive=True))
for f in files:
    print(f)

C:\Confidential_Assessments\Stat6197_A2\spd.sas
C:\Confidential_Assessments\_Assignment2\spd.sas
C:\Data\spd.sas
C:\Misc\spd.sas
C:\Users\pmuhuri\Contacts\Box Sync\SASCourse_Spring18\_Assignment2\spd.sas
C:\Users\pmuhuri\Downloads\spd.sas


### Count the number of files in a folder

In [19]:
import os
cpt = sum([len(files) for r, d, files in os.walk("C:\SASHELP_Raw1")])
cpt

184

### Copy .SAS files from one folder to another folder

In [2]:
import glob, os, shutil

files = glob.iglob(os.path.join('U:\A_DataRequest', "*.sas"))
for file in files:
    if os.path.isfile(file):
        shutil.copy2(file, 'U:\All_SAS')

### Move .SAS files from a particular folder to a new folder

In [7]:
import os
import shutil
sourcepath='U:/_Misc/'
sourcefiles = os.listdir(sourcepath)
destinationpath = 'U:/All_SAS'
for file in sourcefiles:
    if file.endswith('.sas'):
        shutil.move(os.path.join(sourcepath,file), os.path.join(destinationpath,file))

In [18]:
from saspy import autocfg
autocfg.main()

CFGFILE ALREADY EXISTS: C:\Users\pmuhuri\AppData\Local\Continuum\anaconda3\lib\site-packages\saspy\sascfg_personal.py


In [19]:
fd = open('C:\Data\Deads_2002.sas')
print(fd.read())
fd.close()


LIBNAME library  'C:\Data';
LIBNAME new  'C:\Data';
data Deads_2002;
  set new.panel6 (in=a) new.panel7 (in=b);
  array pstats(3) pstats31 pstats42 pstats53;
  
 if 23 in pstats then found_dead=1;
 else if 24 in pstats then found_dead=1;
 else if 31 in pstats then found_dead=1;
 else found_dead=0;  
 
if a=1 then panel=6; else panel=7;
if found_dead=1  then output;
 run;
 data new.c_Deads_2002; 
   set Deads_2002;
   cum_count+count;
run;

ods pdf file='C:\Data\Deads_data.pdf' ;
options nocenter nodate nonumber ls=132 leftmargin=.1in rightmargin=1in
options nocenter ls=132;
title 'Insope status of Deads in MEPS 2002';
proc print data=new.c_Deads_2002 noobs;
var Panel 
inscope INSC1231 inscop02 begrfy endrfy inscop31 pstats31 inscop42 pstats42
        inscop53 pstats53 cum_count ;
run;
ods pdf close;




### How to know the pandas version

In [20]:
pd.__version__

'0.23.4'

### Use the show_versions() function to know the versions of pandas' dependencies

In [21]:
import pandas as pd
pd.show_versions()


INSTALLED VERSIONS
------------------
commit: None
python: 3.7.1.final.0
python-bits: 64
OS: Windows
OS-release: 10
machine: AMD64
processor: Intel64 Family 6 Model 142 Stepping 9, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None

pandas: 0.23.4
pytest: 4.0.2
pip: 18.1
setuptools: 40.6.3
Cython: 0.29.2
numpy: 1.15.4
scipy: 1.1.0
pyarrow: None
xarray: None
IPython: 7.2.0
sphinx: 1.8.4
patsy: 0.5.1
dateutil: 2.7.5
pytz: 2018.7
blosc: None
bottleneck: 1.2.1
tables: 3.4.4
numexpr: 2.6.8
feather: None
matplotlib: 3.0.2
openpyxl: 2.5.12
xlrd: 1.2.0
xlwt: 1.3.0
xlsxwriter: 1.1.2
lxml: 4.2.5
bs4: 4.6.3
html5lib: 1.0.1
sqlalchemy: 1.2.15
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None


In [12]:
from platform import python_version
print(python_version())


3.7.4
