In [1]:
import pandas as pd
import os

## Obtain different dataset data.gov

https://catalog.data.gov/dataset

In [2]:
from six.moves import urllib

# where the datasets will be placed
ROOT_DATA = "../../../ROOT_DATA/data_gov/"

def fetch_data_from_URL(housing_url, file_name, sub_dir="tmp", root_path=ROOT_DATA):
    placement_dir = os.path.join(root_path, sub_dir)
    if not os.path.isdir(placement_dir):
        os.makedirs(placement_dir)
    placement_path = os.path.join(placement_dir, file_name)
    # only download if not already present
    if not os.path.isfile(placement_path):
        urllib.request.urlretrieve(housing_url, placement_path)
    return placement_path

In [3]:
# .CSV data
la_csv_path = fetch_data_from_URL("https://data.lacity.org/api/views/nxs9-385f/rows.csv?accessType=DOWNLOAD",
                    "2010.csv", sub_dir="la_cencus")

In [4]:
# read entire file into a dataframe
df = pd.read_csv(la_csv_path)

# summmary of dataframe
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 319 entries, 0 to 318
Data columns (total 7 columns):
Zip Code                  319 non-null int64
Total Population          319 non-null int64
Median Age                319 non-null float64
Total Males               319 non-null int64
Total Females             319 non-null int64
Total Households          319 non-null int64
Average Household Size    319 non-null float64
dtypes: float64(2), int64(5)
memory usage: 17.5 KB
None


In [5]:
print(df.head())

   Zip Code  Total Population  Median Age  Total Males  Total Females  \
0     91371                 1        73.5            0              1   
1     90001             57110        26.6        28468          28642   
2     90002             51223        25.5        24876          26347   
3     90003             66266        26.3        32631          33635   
4     90004             62180        34.8        31302          30878   

   Total Households  Average Household Size  
0                 1                    1.00  
1             12971                    4.40  
2             11731                    4.36  
3             15642                    4.22  
4             22547                    2.73  


In [6]:
print(df["Median Age"] > 25)

0       True
1       True
2       True
3       True
4       True
5       True
6       True
7      False
8       True
9       True
10      True
11      True
12      True
13      True
14      True
15      True
16      True
17      True
18      True
19      True
20      True
21      True
22      True
23     False
24      True
25      True
26      True
27      True
28      True
29      True
       ...  
289     True
290     True
291     True
292     True
293     True
294     True
295     True
296     True
297     True
298     True
299     True
300     True
301     True
302     True
303     True
304     True
305     True
306     True
307     True
308     True
309     True
310     True
311     True
312     True
313     True
314     True
315     True
316     True
317     True
318     True
Name: Median Age, dtype: bool


In [7]:
mask = df["Median Age"] > 30
print(df[mask].head())

   Zip Code  Total Population  Median Age  Total Males  Total Females  \
0     91371                 1        73.5            0              1   
4     90004             62180        34.8        31302          30878   
5     90005             37681        33.9        19299          18382   
6     90006             59185        32.4        30254          28931   
8     90008             32327        39.7        14477          17850   

   Total Households  Average Household Size  
0                 1                    1.00  
4             22547                    2.73  
5             15044                    2.50  
6             18617                    3.13  
8             13841                    2.33  


## Multiple conditions

### AND

In [8]:
age_mask = df["Median Age"] > 42
pop_mask = df["Total Population"] > 40000

In [9]:
print(df[age_mask & pop_mask].head())

     Zip Code  Total Population  Median Age  Total Males  Total Females  \
96      90275             41804        47.8        20283          21521   
136     90703             49399        43.9        23785          25614   
286     91789             43079        42.8        20988          22091   

     Total Households  Average Household Size  
96              15618                    2.65  
136             15604                    3.16  
286             12891                    3.31  


### OR

In [10]:
age_mask = df["Median Age"] > 42
pop_mask = df["Total Population"] > 40000

In [11]:
print(df[age_mask | pop_mask].head())

   Zip Code  Total Population  Median Age  Total Males  Total Females  \
0     91371                 1        73.5            0              1   
1     90001             57110        26.6        28468          28642   
2     90002             51223        25.5        24876          26347   
3     90003             66266        26.3        32631          33635   
4     90004             62180        34.8        31302          30878   

   Total Households  Average Household Size  
0                 1                    1.00  
1             12971                    4.40  
2             11731                    4.36  
3             15642                    4.22  
4             22547                    2.73  


### Combined

In [12]:
age_mask_up = df["Median Age"] > 43
pop_mask_up = df["Total Population"] > 40000
age_mask_down = df["Median Age"] < 25
pop_mask_down = df["Total Population"] > 40000

In [13]:
print(df[(age_mask_down & pop_mask_down) | (age_mask_up & pop_mask_up)].head())

     Zip Code  Total Population  Median Age  Total Males  Total Females  \
7       90007             40920        24.0        20915          20005   
23      90024             47452        23.6        22248          25204   
96      90275             41804        47.8        20283          21521   
136     90703             49399        43.9        23785          25614   

     Total Households  Average Household Size  
7               11944                    3.00  
23              17903                    2.03  
96              15618                    2.65  
136             15604                    3.16  
