# CSV to list of dictionaries

Example of how to read a csv into a list of dictionaries, and then process these one at the time

In [None]:
import csv

# make list of dictionaries
with open('..\datasets\glassdoor.csv') as f:
    # note that all values (v) are text (string)
    items = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]

In [None]:
print('number of items: ', len(items))

In [None]:
items[0:3]

In [None]:
# a function returning 2 values (tuple)
def myFunc():
    return("hi", 7)

# assign both values to a single variable
values = myFunc()  
print('values:', values)
print('type:', type( values ) )

# assign it to two variables (in one go)
x,y = myFunc()
print('value x:', x, 'value y:', y)

In [None]:
myList = [ ('hi', 7), ("go", 8), ("gators", 10)   ]

In [None]:
# turn it into an object
{k: v for k, v in myList}

## Process the list of dictionaries

### For loop

In [None]:
# process the first 3 dictionaries in the list
for i in items[0:3]:
    # i will be a dictionary, holding one record at the time
    print(type(i))
    print( 'Overall score: {}, Firm: {} ({})'. format(i["overall_rating"], i["firm"], i["department"]) )
    # do other things

### List comprehension

In [None]:
# function that returns the department in uppercase
def myFunction( row ):
    return (row['department'].upper() )

# list comprehension: call function for each dictionary at the time (for those where firm is 'ey')
department_ey = [  myFunction(i) for i in items if i['firm'] == 'ey' ]

print(department_ey)

## Converting numerical variables to float

```{k: v for k, v in row.items()}``` will set all variables to text (string). What if we want to do numerical operations on some variables?

We can use the `isnumeric' method on strings

In [None]:
'5'.isnumeric()

and then change the string into float if True

In [None]:
float('5')

Unfortunately, `isnumeric` only works for integers

In [None]:
"1500".isnumeric()

In [None]:
"15.05".isnumeric()

In [None]:
# function to test is a string holds a valid numer
# https://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-represents-a-number-float-or-int
def is_number(s):
    # return True for missing values as well
    if s == '':
        return True
    try:
        float(s)
        return True
    except ValueError:
        return False

In [None]:
is_number("15.05")

We want to convert a whole column of strings into numbers; we want to also convert if it is missing (''). Our function will return True for missing strings:

In [None]:
is_number('')

We would want to ensure though that all strings are valid numbers (in the whole column) so that we don't end up with a variable that has strings mixed with numbers.

We can use the `all` function for this. Call this function with a list of booleans. This function will return True if all are True. (There is also an `any` function, which returns True if at least one is True)

In [None]:
all([True, True, True])

In [None]:
all([True, True, False])

In [None]:
any([False, False, True])

Let's go through the 'overall_rating' variable. Use list comprehension to make a list of booleans for each value being a number or not, and pass this into `all`

In [None]:
# first 10
[ is_number( i['overall_rating'] ) for i in items[0:10]  ] 

In [None]:
all( [ is_number( i['overall_rating'] ) for i in items  ] )

In [None]:
# helper function, cast property into a float
# obj: object to change
# prop: property of object to change
def myFloatMaker( obj, prop  ):
    # guard for missing string values, set it to NaN (not a number)
    if obj[prop]=='':
        obj[prop] = float("nan")
    # not missing: convert to float
    obj[prop] = float( obj[prop])

# if all are numbers, then let's convert it into a float
if all( [ is_number( i['overall_rating'] ) for i in items  ] ):
       [ myFloatMaker(i, "overall_rating") for i in items  ]

items[0:3]

In [None]:
# function that turns strings into floats (if numeric)
# myListDictionaries: list of dictionaries
# props: list of strings with properties (for example: ['overall_rating', 'compensation'])
# this function still uses function myFloatMaker (previous cell)
def MakeNumeric( myListDictionaries, props ):
    # loop through each property to convert
    for prop in props:
        # are all values numeric (or missing)
        if all( [ is_number( i[prop]) for i in myListDictionaries  ] ):
            # set all values to float
            [ myFloatMaker(i, prop) for i in myListDictionaries  ]

            
#call function
MakeNumeric( items, ['overall_rating', 'compensation', 'worklife', 'culture_values', 'career', 'seniormanagement']  )

items[0:3]

### About dealing with NaNs

See: https://towardsdatascience.com/5-methods-to-check-for-nan-values-in-in-python-3f21ddd17eed#:~:text=NaN%20stands%20for%20Not%20A,major%20problems%20in%20Data%20Analysis.
        
You can use numpy and pandas to check for NaNs. A NaN variable does not equal itself, so you can also use:    

In [None]:
def isNaN(num):
    return num!= num

x=float("nan")
isNaN(x)

In [None]:
isNaN(5.3)