### Lambda and Sets

In [6]:
import pandas as pd

# don't use the names of states an the index!
states = pd.read_csv("s_states.csv")

def set_of_chars(s):
  return set(list(s.lower()))

series_of_sets = states.State.apply(lambda s: set_of_chars(s))

In [1]:
a = {1, 2, 3}
b = {2, 4}
a.union(b)

{1, 2, 3, 4}

In [8]:
from functools import reduce
chars_used_in_states_name = reduce(lambda x, y: x.union(y), series_of_sets)

print(len(chars_used_in_states_name))
chars_used_in_states_name


26


{' ',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z'}

In [9]:
from string import ascii_lowercase
print(" " in ascii_lowercase) # Should print `False`
print("a" in ascii_lowercase) # Should print `True`

False
True


In [12]:
chars_used_in_states_name = {x for x in chars_used_in_states_name if x in ascii_lowercase}

print(len(chars_used_in_states_name))
chars_used_in_states_name

25


{'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z'}

### Combining DFs

In [13]:
left = pd.DataFrame({"id": [1, 2, 3], "names": ["Ethan", "Henry", "Mason"]})
left

Unnamed: 0,id,names
0,1,Ethan
1,2,Henry
2,3,Mason


In [14]:
right = pd.DataFrame({"id": [1, 2, 3], "names": ["Mark", "Luke", "Peter"]})
right

Unnamed: 0,id,names
0,1,Mark
1,2,Luke
2,3,Peter


In [15]:
pd.merge(left, right, on="id")

Unnamed: 0,id,names_x,names_y
0,1,Ethan,Mark
1,2,Henry,Luke
2,3,Mason,Peter


In [17]:
left = pd.DataFrame({"id": [1, 3], "names": ["Ethan", "Mason"]})
right = pd.DataFrame({"id": [1, 2], "names": ["Mark", "Luke"]})
pd.merge(left, right, on="id")
# removes non-overlapping IDs

Unnamed: 0,id,names_x,names_y
0,1,Ethan,Mark


In [18]:
pd.merge(left, right, on="id", how="left")

Unnamed: 0,id,names_x,names_y
0,1,Ethan,Mark
1,3,Mason,


In [20]:
pd.merge(left, right, on="id", how="right")

Unnamed: 0,id,names_x,names_y
0,1,Ethan,Mark
1,2,,Luke


In [21]:
pd.merge(left, right, on="id", how="outer")

Unnamed: 0,id,names_x,names_y
0,1,Ethan,Mark
1,3,Mason,
2,2,,Luke


### Concat

In [22]:
top = pd.DataFrame({"letters": ["a", "b", "c"], "numbers": [1, 2, 3]})
bottom = pd.DataFrame({"letters": ["g", "h", "i"], "numbers": [7, 8, 9]})
pd.concat([top, bottom])

Unnamed: 0,letters,numbers
0,a,1
1,b,2
2,c,3
0,g,7
1,h,8
2,i,9


In [25]:
top = pd.DataFrame({"letters": ["a", "b", "c"], "numbers": [1, 2, 3]})
middle = pd.DataFrame({"letters": ["d", "e", "f"], "numbers": [4, 5, 6]})
bottom = pd.DataFrame({"letters": ["g", "h", "i"], "numbers": [7, 8, 9]})

pd.merge(top, middle, on="numbers", how="outer")

Unnamed: 0,letters_x,numbers,letters_y
0,a,1,
1,b,2,
2,c,3,
3,,4,d
4,,5,e
5,,6,f


In [28]:
pd.merge(pd.merge(top, middle, on="numbers", how="outer"), bottom, on="numbers" , how="outer")

Unnamed: 0,letters_x,numbers,letters_y,letters
0,a,1,,
1,b,2,,
2,c,3,,
3,,4,d,
4,,5,e,
5,,6,f,
6,,7,,g
7,,8,,h
8,,9,,i


### Reshaping

In [31]:
# data was collected by the city of Pittsburgh from 311 calls (too large; look it up)
data311 = pd.read_csv("./76fda9d0-69be-4dd5-8108-0de7907fc5a4.csv")
data311.head()

Unnamed: 0,_id,REQUEST_ID,CREATED_ON,REQUEST_TYPE,REQUEST_ORIGIN,STATUS,DEPARTMENT,NEIGHBORHOOD,COUNCIL_DISTRICT,WARD,TRACT,PUBLIC_WORKS_DIVISION,PLI_DIVISION,POLICE_ZONE,FIRE_ZONE,X,Y,GEO_ACCURACY
0,1,203364.0,2017-12-15T14:53:00,Street Obstruction/Closure,Call Center,1,DOMI - Permits,Central Northside,1.0,22.0,42003220000.0,1.0,22.0,1.0,1-7,-80.016716,40.454144,EXACT
1,2,200800.0,2017-11-29T09:54:00,Graffiti,Control Panel,1,Police - Zones 1-6,South Side Flats,3.0,16.0,42003160000.0,3.0,16.0,3.0,4-24,-79.969952,40.429243,APPROXIMATE
2,3,201310.0,2017-12-01T13:23:00,Litter,Call Center,1,DPW - Street Maintenance,Troy Hill,1.0,24.0,42003240000.0,1.0,24.0,1.0,1-2,-79.985859,40.459716,EXACT
3,4,200171.0,2017-11-22T14:54:00,Water Main Break,Call Center,1,Pittsburgh Water and Sewer Authority,Banksville,2.0,20.0,42003200000.0,5.0,20.0,6.0,4-9,-80.03421,40.406969,EXACT
4,5,193043.0,2017-10-12T12:46:00,Guide Rail,Call Center,1,DPW - Construction Division,East Hills,9.0,13.0,42003130000.0,2.0,13.0,5.0,3-19,-79.876582,40.451226,EXACT


In [33]:
neighborhood = data311.groupby(by="NEIGHBORHOOD")
neighborhood

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f9a4923b6d0>

In [34]:
neighborhood.ngroups

91

In [35]:
neighborhood.groups

{'Allegheny Center': Int64Index([   137,    446,    730,   1877,   2148,   2397,   2549,   2619,
               2759,   2775,
             ...
             678600, 678607, 678677, 678748, 678861, 679142, 679360, 679644,
             679796, 679891],
            dtype='int64', length=2424),
 'Allegheny West': Int64Index([   945,   1417,   1426,   2129,   2897,   3119,   3610,   4222,
               4553,   4612,
             ...
             674750, 675489, 675767, 676503, 676751, 677273, 678546, 678990,
             678998, 679006],
            dtype='int64', length=1306),
 'Allentown': Int64Index([   116,    167,    257,    350,    408,    439,    657,    853,
               1092,   1213,
             ...
             679476, 679559, 679573, 679614, 679652, 679752, 679818, 679906,
             679967, 680001],
            dtype='int64', length=7281),
 'Arlington': Int64Index([   344,    460,    565,    566,    709,    710,    754,   1158,
               1199,   1324,
             ...


In [36]:
neighborhood.size()

NEIGHBORHOOD
Allegheny Center       2424
Allegheny West         1306
Allentown              7281
Arlington              5259
Arlington Heights       140
                       ... 
Upper Lawrenceville    5903
West End               1599
West Oakland           3458
Westwood               3457
Windgap                2308
Length: 91, dtype: int64

In [37]:
Arlington_Group = neighborhood.get_group('Arlington')
Arlington_Group.head()

Unnamed: 0,_id,REQUEST_ID,CREATED_ON,REQUEST_TYPE,REQUEST_ORIGIN,STATUS,DEPARTMENT,NEIGHBORHOOD,COUNCIL_DISTRICT,WARD,TRACT,PUBLIC_WORKS_DIVISION,PLI_DIVISION,POLICE_ZONE,FIRE_ZONE,X,Y,GEO_ACCURACY
344,345,181304.0,2017-08-22T14:22:00,Overgrowth,Call Center,1,DPW - Street Maintenance,Arlington,3.0,16.0,42003160000.0,3.0,16.0,3.0,4-22,-79.977372,40.417817,EXACT
460,461,186068.0,2017-09-12T09:01:00,Junk Vehicles,Call Center,1,"Permits, Licenses and Inspections",Arlington,3.0,16.0,42003560000.0,3.0,16.0,3.0,4-8,-79.97375,40.414812,APPROXIMATE
565,566,162690.0,2017-06-16T13:26:00,Potholes,Call Center,1,DPW - Street Maintenance,Arlington,3.0,16.0,42003560000.0,3.0,16.0,3.0,4-8,-79.975976,40.416603,EXACT
566,567,162720.0,2017-06-16T14:20:00,Potholes,Call Center,1,DPW - Street Maintenance,Arlington,3.0,16.0,42003560000.0,3.0,16.0,3.0,4-8,-79.976027,40.416822,EXACT
709,710,151094.0,2017-05-05T10:39:00,Potholes,Call Center,1,DPW - Street Maintenance,Arlington,3.0,16.0,42003560000.0,3.0,16.0,3.0,4-8,-79.971742,40.415572,EXACT


In [38]:
requests_by_neighborhood = data311.groupby(by=["NEIGHBORHOOD", "DEPARTMENT"])
requests_by_neighborhood.get_group(("Arlington", "311"))

Unnamed: 0,_id,REQUEST_ID,CREATED_ON,REQUEST_TYPE,REQUEST_ORIGIN,STATUS,DEPARTMENT,NEIGHBORHOOD,COUNCIL_DISTRICT,WARD,TRACT,PUBLIC_WORKS_DIVISION,PLI_DIVISION,POLICE_ZONE,FIRE_ZONE,X,Y,GEO_ACCURACY
33683,33684,23524.0,2015-09-15T06:52:00,Vacant Lot,Website,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.973651,40.414868,APPROXIMATE
45819,45820,131104.0,2017-02-09T15:41:00,Question,Call Center,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.963551,40.412528,APPROXIMATE
70696,70697,162318.0,2017-06-15T14:49:00,Police Department,Website,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.973633,40.415081,APPROXIMATE
72190,72191,87018.0,2016-07-11T15:42:00,Vacant Lot,Control Panel,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.973524,40.415076,APPROXIMATE
160343,160344,43934.0,2016-01-27T16:16:00,Schedule Request,Call Center,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.963774,40.412131,APPROXIMATE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
652662,785190,673421.0,2022-08-31T10:14:00,Referral,Call Center,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.973849,40.414853,APPROXIMATE
660517,796725,681256.0,2022-09-27T11:53:00,City Cuts Concern,Call Center,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.963678,40.412425,APPROXIMATE
663610,801084,684343.0,2022-10-08T09:16:00,Vacant Lot,Website,3,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.963449,40.412502,APPROXIMATE
669602,810208,690310.0,2022-11-01T10:37:00,City Cuts Concern,Call Center,1,311,Arlington,3.0,16.0,4.200356e+10,3.0,16.0,3.0,4-8,-79.963380,40.412236,APPROXIMATE
