# Correlation Crawler

This notebook is where we defined the "rabbit hole" function, which recursively finds correlations and correlations with correlated variables, given a variable, recursion depth, and correlation threshold.

## Passing a variable and getting all correlations to that variable that don't already correlate to a previous predictor variable

#### Import libraries - If running on the core/capstone container, you might need to update Pandas or it'll error on the sort_values function

In [1]:
pip install -U pandas

Collecting pandas
[?25l  Downloading https://files.pythonhosted.org/packages/99/f0/f99700ef327e51d291efdf4a6de29e685c4d198cbf8531541fc84d169e0e/pandas-1.3.5.tar.gz (4.7MB)
[K     |████████████████████████████████| 4.7MB 3.5MB/s eta 0:00:01
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Building wheels for collected packages: pandas
  Building wheel for pandas (PEP 517) ... [?25ldone
[?25h  Created wheel for pandas: filename=pandas-1.3.5-cp37-cp37m-linux_x86_64.whl size=30216315 sha256=99d92bd0dd49ff9e004664c9ee7ff6cc1e9482acadecbf0a9ca6ec45f8935390
  Stored in directory: /home/bmgwd9/.cache/pip/wheels/5c/f4/45/389dc711f0c5ff9adeb5245397ab18bf75182e8cff9fbfa916
Successfully built pandas
Installing collected packages: pandas
  Found existing installation: pandas 0.25.2
    Uninstalling pandas-0.25.2:
      Successfully uninstalled pandas-0.25.2
Successfully installed pan

In [2]:
import pandas as pd



#### Defining indicator lookup

In [3]:
def indicator_definition_lookup(indicatorarray):
    
    defs = []
    
    indicatordict = pd.read_csv('../../M1/make_indicator_dict/Indicator_Dict.csv')
    
    indicatordict.columns = ["IndicatorCode", "IndicatorName"]
    
    for ind in indicatorarray:
        defs.append(indicatordict[indicatordict["IndicatorCode"] == ind]["IndicatorName"].values[0])
        
    return defs

#### Running the lookup - example is multiple metrics

In [4]:
indicator_definition_lookup(['SPDYNTFRTIN', 'SESECENRLGCFEZS', 'SEPRMOENRZS', 'ITNETUSERZS', 'DTODAODATMPZS', 'EGUSEELECKHPC', 'SHTBSMORT', 'NYGDPFRSTRTZS', 'AGYLDCRELKG', 'SPURBGROW', 'SHDYNMORTMA'])

['Fertility rate, total (births per woman)',
 'Secondary education, general pupils (% female)',
 'Over-age students, primary (% of enrollment)',
 'Individuals using the Internet (% of population)',
 'Net ODA received (% of imports of goods, services and primary income)',
 'Electric power consumption (kWh per capita)',
 'Tuberculosis death rate (per 100,000 people)',
 'Forest rents (% of GDP)',
 'Cereal yield (kg per hectare)',
 'Urban population growth (annual %)',
 'Mortality rate, under-5, male (per 1,000)']

#### Importing cleaned dataset

In [11]:
full=pd.read_csv("../../M2/carpentry/completeDF.csv")
full=full.iloc[:,3:]#.loc[:,[a for a in full.iloc[:,3:].columns if "indicator" not in a and "short_name" not in a and "alpha2" not in a and "alpha3" not in a and "numeric_code" not in a and "iso3166" not in a]]


#### Creating a correlation matrix


In [12]:
#Creating a correlation matrix
corrDF = full.corr()

#### Defining the correlation generator

In [13]:
def indicator_correlations_gen(indicator, minimum=.6, maximum=.9):

    #Filter DF by indicated correlation threshholds
    cutDF = pd.DataFrame(corrDF[(abs(corrDF[indicator]) > minimum) & (abs(corrDF[indicator] < maximum))][indicator])
    
    #Lookup definitions of correlated indicators
    indicatordefs = indicator_definition_lookup([a for a in cutDF.index])
    
    #Create df containing correlation, correlated indicator, and definition
    cutDF['Indicator']=indicatordefs

    #Return as array
    print("Indicator searched: ", indicator_definition_lookup([indicator]))
    print("Total correlations: ", len(cutDF))
    print(" ")
    return cutDF

In [14]:
indicator_correlations_gen('SHDTH0509')

Indicator searched:  ['Number of deaths ages 5-9 years']
Total correlations:  76
 


Unnamed: 0,SHDTH0509,Indicator
SPPOPAG25FEIN,0.641655,"Age population, age 25, female, interpolated"
SPPOPAG25MAIN,0.645557,"Age population, age 25, male, interpolated"
SPPOPTOTL,0.643418,"Population, total"
SPPOPTOTLFEIN,0.639237,"Population, female"
SPPOPTOTLMAIN,0.647213,"Population, male"
...,...,...
SPPOPAG05FEIN,0.797322,"Age population, age 05, female, interpolated"
SPPOPAG05MAIN,0.793180,"Age population, age 05, male, interpolated"
ENATMMETHAGKTCE,0.643500,Agricultural methane emissions (thousand metri...
AGLNDCRELHA,0.656034,Land under cereal production (hectares)


#### The project for this recursive function was to get an input variable, find all correlated features with that variable, then find all correlated features for those variables to a pre-defined depth. Since it is recursive, we had to use a couple of global variables to escape being overwritten. 

In [15]:
def printable_rabbit_hole(corr_df, variable, var2, var3, var4, var5, depth = 2, thresh = 0.5, pass_around = 0):

    x=-1 #Setting a counter - Could be set to zero if the x came after the changes, no difference
    if depth == 0: #Setting termination criteria
        #print("end of corr")
        return
    global width #setting absolute width as a global value so it isn't rewritten - this is the mechanism of writing at the appropriate width
    global var1 #Setting absolutely var1 as a definition to build into without overwriting
    if width==2: #Using some hacky if statements to make the deeper metrics more restrictive
        thresh=.7
    elif width==3:
        thresh=.8
    elif width==4:
        thresh=.9
    elif width==5:
        thresh=.95
    for correlation in corr_df[(abs(corr_df[variable])>= thresh)&(abs(corr_df[variable])<1)].sort_values(by=variable, key=abs, ascending=False)[variable]: #Setting the correlated array based on the threshold
        width+=1 #Incrementing the global width value for each value. Until a variable completes and decrements, this keeps getting wider
        x = x+1 #Incremementing the counter
        temp_var = corr_df[(abs(corr_df[variable])>= thresh)&(abs(corr_df[variable])<1)].sort_values(by=variable, key=abs, ascending=False).index[x] #Pulling the variable using the index
        if width==5: #Using a hacky list of if statements to set the proper variable width
            var5=temp_var
        elif width==4:
            var4=temp_var
        elif width==3:
            var3=temp_var
        elif width==2:
            var2=temp_var          
            
        var={temp_var:{"corr":correlation}} #Setting the write criteria
        
        if width==5: #Using another set of hacky if statements to write the criteria at the correct width
            var1[var2][var3][var4].update(var)
        elif width==4:
            var1[var2][var3].update(var)
        elif width==3:
            var1[var2].update(var)
        elif width==2:
            var1.update(var)   
        print("\t" * pass_around + temp_var + ": " + str(correlation)) #Luke's original print criteria - good for viewing
        printable_rabbit_hole(corr_df, 
                              variable = temp_var, 
                              depth = depth - 1,
                              thresh = thresh,
                              pass_around = pass_around + 1,
                              var2=var2, var3=var3, var4=var4, var5=var5,)
        width-=1 #Decrementing after the write happens to prepare for the next piece
        if width==4: #Using hacky if statements to flush the vars beyond the currently-used width
            var5==""
        elif width==3:
            var4==""
        elif width==2:
            var3==""
        elif width==1:
            var2==""

#### Executing the above. Just change the variable

In [16]:
variable='SPDYNCBRTIN'
width=1
var1={}
printable_rabbit_hole(corrDF, variable, var2="", var3="", var4="", var5="", depth = 2)

SPPOP0004MA5Y: 0.9882226398510024
	SPPOP0004FE5Y: 0.9965248794708107
	SPDYNCBRTIN: 0.9882226398510024
	SPPOPDPNDYG: 0.9864456609176305
	SPPOP0014MAZS: 0.9817648367127353
	SPPOP0014TOZS: 0.9814615049312868
	SPPOP0014FEZS: 0.9790250296182205
	SPPOP0509MA5Y: 0.9700278430751134
	SPPOP0509FE5Y: 0.9688036557969784
	SPDYNTFRTIN: 0.9615510800721716
	SPPOP1564MAZS: -0.9569900042153677
	SPPOPDPND: 0.9424462207967538
	SPPOP1564TOZS: -0.9400659681909289
	SPPOP4549MA5Y: -0.9219638644479754
	SPPOP4044MA5Y: -0.9185113553350811
	SPPOP4549FE5Y: -0.9160811192434877
	SPPOP4044FE5Y: -0.9047236745103988
	SPPOP1564FEZS: -0.902590926123452
	SPPOP1014FE5Y: 0.9018008197484069
	SPPOP5054MA5Y: -0.8969229149078787
	SPPOP1014MA5Y: 0.89298511904347
	SPPOP5054FE5Y: -0.8929769074936215
	SPPOP5559MA5Y: -0.8758898092008277
	SPPOP5559FE5Y: -0.8723698882073535
	SPPOP6064MA5Y: -0.8674368126959046
	SPPOP6064FE5Y: -0.8629007321608492
	SPPOP6569MA5Y: -0.8619001890817354
	SPPOP6569FE5Y: -0.8586964863823184
	SPPOP65UPTOZS: -0.

	SPPOP6569FE5Y: -0.8954684649367148
	SPPOP4044FE5Y: -0.8953693691055481
	SPPOP7074FE5Y: -0.8943160245903676
	SPPOP6569MA5Y: -0.8938953473218506
	SPPOP1564FEZS: -0.8870713586127855
	SPPOP65UPMAZS: -0.8861057381788852
	SPPOP7579FE5Y: -0.8851578534492862
	SPPOP7074MA5Y: -0.8793834707325512
	SPPOP1519FE5Y: 0.8775242533022664
	SPPOPDPNDOL: -0.8678518665366918
	SPPOP7579MA5Y: -0.8594498510320118
	SPPOP1519MA5Y: 0.8452174337946419
	SPPOP80UPFE5Y: -0.8435801324469512
	SPDYNLE00FEIN: -0.8381231383393783
	SPDYNIMRTMAIN: 0.8372207350475367
	SPDYNIMRTIN: 0.8331076559669554
	SPPOP3539MA5Y: -0.8276705402928208
	SPDYNIMRTFEIN: 0.8269821439664086
	SHDYNNMRT: 0.8245539738343736
	SPADOTFRT: 0.8243652765877261
	SPDYNTO65FEZS: -0.8233273625159874
	SPDYNLE00IN: -0.8107047113132789
	SHDYNMORTMA: 0.8066218189541744
	SPPOP80UPMA5Y: -0.8037309556876197
	SHDYNMORT: 0.8000860934236393
	SPPOPGROW: 0.7964487423676538
	SHDYNMORTFE: 0.7916733798190744
	SLAGREMPLMAZS: 0.7782722816636473
	ITMLTMAINP2: -0.7776418342808

	SHDYNNMRT: -0.7943326140190348
	SPPOP6064MA5Y: 0.7868346730656862
	SPDYNLE00IN: 0.7761875047520791
	SPPOP6569FE5Y: 0.76464876940195
	SPPOP1519FE5Y: -0.7628964847147468
	SPPOPGROW: -0.7597125266039255
	SPPOP7074FE5Y: 0.7561215855633078
	SLINDEMPLMAZS: 0.7552077448569344
	SPPOP65UPFEZS: 0.7503921157573162
	NYADJDPEMGNZS: -0.7491336076071756
	SPPOP6569MA5Y: 0.7436778370547414
	SPPOP65UPTOZS: 0.7427784107000335
	SPPOP7579FE5Y: 0.7415658460712073
	SLAGREMPLMAZS: -0.7403805268192457
	SPDYNAMRTFE: -0.7386567208728909
	EGFECRNEWZS: -0.7369999159072845
	SPDYNLE00MAIN: 0.733444457893505
	SPPOP1519MA5Y: -0.7296872884703774
	SLEMPVULNFEZS: -0.7291760822285164
	SLEMPSELFFEZS: -0.7286696981801389
	SLEMPWORKFEZS: 0.7286692287230739
	SLEMPSELFZS: -0.7269652113533178
	SLEMPWORKZS: 0.7269642856326384
	SLEMPVULNZS: -0.7244122396943342
	SHDYN0509: -0.7189490655448317
	SPPOP65UPMAZS: 0.7173456046537487
	SPPOP7074MA5Y: 0.7165763757745262
	SLAGREMPLZS: -0.7123950739119168
	SLEMPSELFMAZS: -0.7072151058635398

SPPOP4044MA5Y: -0.8991438563678873
	SPPOP4044FE5Y: 0.9773309654432347
	SPPOP4549MA5Y: 0.9327313706403033
	SPPOP0014MAZS: -0.9201634407423246
	SPPOP0014TOZS: -0.9192534345541862
	SPPOP0004MA5Y: -0.9185113553350811
	SPPOPDPNDYG: -0.9170912471784753
	SPPOP0014FEZS: -0.9165239177696582
	SPPOP4549FE5Y: 0.9161593256096264
	SPPOP0004FE5Y: -0.9149864474795452
	SPPOP0509MA5Y: -0.9112188436521994
	SPPOP0509FE5Y: -0.9089476162385317
	SPPOP1564MAZS: 0.9081306980980561
	SPDYNCBRTIN: -0.8991438563678873
	SPPOP3539MA5Y: 0.8921435670367462
	SPPOP1564TOZS: 0.8867554171971329
	SPPOPDPND: -0.8786224651892844
	SPPOP1014FE5Y: -0.8702076402834643
	SPPOP1014MA5Y: -0.8651781268289611
	SPDYNTFRTIN: -0.8591125374188088
	SPPOP1564FEZS: 0.8452700547316886
	SPPOP5054MA5Y: 0.8449759419035895
	SPPOP5054FE5Y: 0.8337536509056778
	SPPOP3539FE5Y: 0.8233036428757037
	SPPOP6569FE5Y: 0.8126403620931699
	SPPOP5559MA5Y: 0.8096135084802033
	SPPOP6064FE5Y: 0.8095180380453498
	SPPOP6064MA5Y: 0.809375749053905
	SPPOP6569MA5Y: 0.

	SPDYNIMRTIN: 0.9808979524587246
	SPDYNIMRTFEIN: 0.9805798842102191
	SPDYNIMRTMAIN: 0.9801656008189911
	SPDYNLE00FEIN: -0.933552225330376
	SPDYNLE00IN: -0.9167694146254062
	SHDYNNMRT: 0.9143279664927306
	NYADJDPEMGNZS: 0.911941220467999
	SHDYN0509: 0.9071474255374474
	SPDYNTO65FEZS: -0.9049507469228267
	SPDYNTFRTIN: 0.8990124206196118
	SPDYNLE00MAIN: -0.8863325376271244
	SPDYNCBRTIN: 0.8822685482940181
	SPPOPDPNDYG: 0.8410436464548671
	SPPOPDPND: 0.8366408008873407
	SPPOP0004MA5Y: 0.8352430268003436
	SPDYNAMRTFE: 0.8327183000917605
	SPPOP0004FE5Y: 0.8307525453418824
	SHDYN1014: 0.8254636762052343
	SPPOP1564TOZS: -0.8155276585945622
	SPPOP1564MAZS: -0.8149997426909942
	SPPOP0014MAZS: 0.8081771484366634
	SPPOP0014TOZS: 0.8066218189541744
	SPPOP0014FEZS: 0.8037193921570087
	SPPOP1564FEZS: -0.8001447957053056
	SPADOTFRT: 0.7904184821505424
	SPDYNTO65MAZS: -0.789789720059481
	SPPOP0509MA5Y: 0.7890768068260607
	SPPOP0509FE5Y: 0.7872721070661877
	SLAGREMPLMAZS: 0.7710502985053529
	SHIMMPOL3: 

	SLAGREMPLMAZS: 0.7654487663357374
	SLAGREMPLZS: 0.7640554384682144
	SLEMPVULNZS: 0.7586472826994698
	SHSTAOWADMAZS: -0.7566798585906441
	SHIMMIDPT: -0.7555532229468018
	SLEMPVULNMAZS: 0.7532120091994828
	SLEMPVULNFEZS: 0.7504660555711793
	SLEMPWORKZS: -0.7501355891348266
	SLEMPSELFZS: 0.7501352092037545
	SPPOP4044FE5Y: -0.7498283069850791
	SLEMPSELFFEZS: 0.7464255714080659
	SLEMPWORKFEZS: -0.746424467075633
	SLEMPSELFMAZS: 0.7411448243001365
	SLEMPWORKMAZS: -0.7411427360150711
	SHIMMMEAS: -0.738371981029627
	SPPOP4044MA5Y: -0.7360541283282267
	SHSTAOWADZS: -0.7306679268202634
	SPPOP4549FE5Y: -0.7287972872869002
	SPPOP4549MA5Y: -0.7233502507481513
	SPPOP1014FE5Y: 0.7227714258772803
	NVAGRTOTLZS: 0.7212965569804317
	SLAGREMPLFEZS: 0.720584548647891
	SPPOP1014MA5Y: 0.7196983886398105
	SLSRVEMPLZS: -0.7168739414333517
	SLFAMWORKMAZS: 0.7153610018900859
	SPPOP3539MA5Y: -0.7138759440341689
	SHHTNTRETFEZS: -0.7103500327278695
	SPPOP3539FE5Y: -0.7073906149114774
	SHHTNTRETZS: -0.7025743383504

	SPDYNIMRTMAIN: -0.9085696694407809
	SPDYNIMRTIN: -0.9062622888771241
	SHDYNMORTMA: -0.9049507469228267
	SPDYNIMRTFEIN: -0.902327895736855
	SHDYNMORT: -0.8998464513435132
	SHDYNMORTFE: -0.8929456054267242
	SPDYNAMRTMA: -0.8683041391795748
	SPDYNCBRTIN: -0.8660056947492291
	SPDYNTFRTIN: -0.8551386207945582
	SPPOPDPNDYG: -0.8427832324453058
	SHDYNNMRT: -0.8394379734768995
	SPPOP0004MA5Y: -0.8370676252824648
	SPPOP0004FE5Y: -0.832082732655822
	SPPOP0014MAZS: -0.8257268551046801
	SPPOP0014TOZS: -0.8233273625159874
	SPPOP0014FEZS: -0.8199539191140918
	SPPOPDPND: -0.8143283742474986
	SPPOP0509MA5Y: -0.8084060481752643
	NYADJDPEMGNZS: -0.8057090162601228
	SPPOP0509FE5Y: -0.8052410156254282
	SHDYN0509: -0.8048409155879784
	SPPOP1564TOZS: 0.8006366583997434
	SPPOP1564MAZS: 0.7989666568699919
	SPPOP1564FEZS: 0.7864188210105972
	SPPOP4044FE5Y: 0.7837604479922227
	SHSTAOWADMAZS: 0.7784559599689878
	SPADOTFRT: -0.7741322671690197
	SPPOP4549FE5Y: 0.7681769842049209
	SPPOP1014MA5Y: -0.767239084058430

	SPPOP1014MA5Y: -0.7635577802247857
	SPADOTFRT: -0.7634530974856631
	SPPOP4549FE5Y: 0.7564550942138514
	SPPOP1014FE5Y: -0.7558357429576481
	SPPOP4549MA5Y: 0.7526211285350121
	SPPOP4044MA5Y: 0.7469526260337865
	SPPOP5054MA5Y: 0.7442975515261534
	SLSRVEMPLZS: 0.7439917974894885
	SLEMPVULNFEZS: -0.7376107771601432
	SPPOP5054FE5Y: 0.733168496043973
	SPPOP65UPMAZS: 0.7330054895989645
	SLEMPSELFFEZS: -0.7328541289865281
	SLEMPWORKFEZS: 0.7328533961903069
	SLEMPVULNZS: -0.732061963531687
	SPPOP5559MA5Y: 0.7263073360838508
	SPPOP80UPMA5Y: 0.7254031482563487
	SHHTNTRETMAZS: 0.7244852015142791
	SLSRVEMPLMAZS: 0.7220342138500949
	SHSTAOB18MAZS: 0.719501635732985
	SLEMPWORKZS: 0.7189588782389008
	SLEMPSELFZS: -0.718957405518967
	SLAGREMPLFEZS: -0.7185775070585724
	SHSTAOWADZS: 0.7180517735911646
	SPPOP7579MA5Y: 0.7173443748614775
	SPRURTOTLZS: -0.7151053788815014
	SPURBTOTLINZS: 0.7151053788815014
	SHHTNTRETZS: 0.7139642600793562
	SPPOP6064MA5Y: 0.7128523879136051
	EGELCACCSZS: 0.7106077182133995


SPPOP6064MA5Y: -0.8406672362637244
	SPPOP6064FE5Y: 0.9858321434380574
	SPPOP6569MA5Y: 0.9555574789091865
	SPPOP5559MA5Y: 0.9551837612565578
	SPPOP65UPTOZS: 0.9442939026649835
	SPPOP6569FE5Y: 0.9421745700604075
	SPPOP5559FE5Y: 0.9416837310778106
	SPPOP65UPFEZS: 0.9405245627641537
	SPPOP65UPMAZS: 0.9326957981801457
	SPPOPDPNDOL: 0.9304437814982419
	SPPOP1519FE5Y: -0.9256811962871832
	SPPOP1014FE5Y: -0.9194889577011263
	SPPOP1014MA5Y: -0.9134416738392104
	SPPOP7074FE5Y: 0.9089250605004096
	SPPOP1519MA5Y: -0.9084543025390565
	SPPOP0014TOZS: -0.9068119807786548
	SPPOP0014FEZS: -0.9059733387678733
	SPPOP7579FE5Y: 0.90556962238266
	SPPOP0014MAZS: -0.9052529506892035
	SPPOP80UPFE5Y: 0.9050949378411227
	SPPOP7074MA5Y: 0.9040370067026674
	SPPOP0509FE5Y: -0.8964974982503072
	SPPOP5054MA5Y: 0.8964413810520931
	SPPOP0509MA5Y: -0.8943811033816658
	SPPOP7579MA5Y: 0.8858102876041034
	SPPOP5054FE5Y: 0.8772551213123034
	SPPOP0004FE5Y: -0.8687482946802283
	SPPOP0004MA5Y: -0.8674368126959046
	SPPOP80UPMA5

	SPPOP5054FE5Y: 0.8436674840067869
	SPPOPDPNDYG: -0.8403252672085458
	SPDYNCBRTIN: -0.8299226804167736
	SPPOP4549FE5Y: 0.8174621645011171
	SPPOP4044MA5Y: 0.8052735437746305
	NYGDPPCAPPPKD: 0.7730109095265378
	SPPOP2024MA5Y: -0.769563291102731
	SLGDPPCAPEMKD: 0.7678680343488089
	ITMLTMAINP2: 0.7615485655659631
	SPPOP4044FE5Y: 0.7531381283357719
	NYGNPPCAPPPCD: 0.7471595528765682
	SPDYNTFRTIN: -0.74604790022582
	SPPOP1564MAZS: 0.7436778370547414
	NYGDPPCAPPPCD: 0.7421533145757387
	SPPOPGROW: -0.7315658706302914
	SPDYNLE00FEIN: 0.7223502691606822
	SLEMPWORKFEZS: 0.712796436933612
	SLEMPSELFFEZS: -0.7127963905405142
	SLEMPVULNFEZS: -0.711821232010858
	SPDYNTO65FEZS: 0.7083964430078962
	SLEMPSELFZS: -0.7066978526266997
	SLEMPWORKZS: 0.706697675349197
	SHDYNNMRT: -0.7055932533279479
	SPDYNLE00IN: 0.7053498932834215
	SLAGREMPLMAZS: -0.703530730796228
	SLEMPVULNZS: -0.7004579695690056
SPPOP65UPTOZS: -0.8203077173337688
	SPPOPDPNDOL: 0.9964765297080437
	SPPOP65UPFEZS: 0.9956110764049676
	SPPOP6

	SHDYNNMRT: -0.8163440256642239
	SPDYNCBRTIN: -0.8125139633911437
	SPDYNTFRTIN: -0.803056043733999
	SHDYN0509: -0.7929656718022755
	SPPOPDPNDYG: -0.7911097827834173
	NYADJDPEMGNZS: -0.788009057877105
	SPPOP0004MA5Y: -0.7857511535457667
	SPPOP0014MAZS: -0.7805247383099623
	SPPOP0014TOZS: -0.7716584045964505
	SPPOP0004FE5Y: -0.7713710717404866
	SPPOP0509MA5Y: -0.7641847489848339
	SPPOP0014FEZS: -0.7628055504038
	SHSTAOWADMAZS: 0.7590160673479647
	SPPOPDPND: -0.7564952710408288
	SLAGREMPLMAZS: -0.7544022104545175
	SHDYN1014: -0.7526775273986755
	SLAGREMPLZS: -0.749297232985369
	SPPOP0509FE5Y: -0.7488764559348123
	SPADOTFRT: -0.7413184348944708
	SPPOP1564TOZS: 0.7378424355247692
	SPPOP1014MA5Y: -0.7342454993983293
	SPPOP1564MAZS: 0.733444457893505
	SLSRVEMPLZS: 0.7317064867745244
	SPPOP4044FE5Y: 0.7290490986281218
	SPPOP1564FEZS: 0.7269332359003728
	SPPOP4549FE5Y: 0.7223774427220467
	SLSRVEMPLMAZS: 0.720649492262622
	SPPOP80UPMA5Y: 0.7203370860773286
	SPPOP4549MA5Y: 0.7195309298243389
	SPP

	SPPOP5559FE5Y: 0.8886715397463859
	SPPOP0014FEZS: -0.887665067828724
	SPPOP0509FE5Y: -0.8859663917888038
	SPPOP0014TOZS: -0.8851578534492862
	SPPOP0014MAZS: -0.8796935935195395
	SPPOP0509MA5Y: -0.8778291338309806
	SPPOP80UPMA5Y: 0.8734942529469675
	SPPOP1519MA5Y: -0.8733065072774346
	SPPOP5054FE5Y: 0.8715064004611488
	SPPOP4549MA5Y: 0.8435981213492334
	SPPOP0004FE5Y: -0.8395759884948889
	SPPOP2024FE5Y: -0.8380225120869557
	SPPOP0004MA5Y: -0.8314607817275592
	SPPOPDPNDYG: -0.8238852884971568
	SPPOP4549FE5Y: 0.8070940184572475
	SPDYNCBRTIN: -0.7953843829359887
	ITMLTMAINP2: 0.7930922065065742
	SPPOP4044MA5Y: 0.7711860172739037
	NYGDPPCAPPPKD: 0.7490112553696944
	SLGDPPCAPEMKD: 0.7422788876410991
	SPPOP1564MAZS: 0.7415658460712073
	SPPOP2024MA5Y: -0.7335000705556868
	SPPOPGROW: -0.7186141308642262
	SPDYNTFRTIN: -0.7150288236759624
	SHDYNNMRT: -0.7107821015109759
	SLEMPWORKFEZS: 0.7081675668913752
	SLEMPSELFFEZS: -0.7081666917446124
	SLEMPVULNFEZS: -0.707477821873975
	SLEMPSELFZS: -0.7043

SLEMPWORKZS: -0.7765599225775294
	SLEMPSELFZS: -0.999999994316971
	SLEMPVULNZS: -0.9963761811919114
	SLEMPSELFMAZS: -0.9925627046421213
	SLEMPWORKMAZS: 0.9925625604177583
	SLEMPWORKFEZS: 0.9895231582194675
	SLEMPSELFFEZS: -0.9895231408964967
	SLEMPVULNFEZS: -0.9887529821664515
	SLEMPVULNMAZS: -0.9875061055642606
	SLAGREMPLMAZS: -0.9048312109541401
	SLAGREMPLZS: -0.904335821872485
	SLINDEMPLMAZS: 0.8559858840010681
	SLFAMWORKZS: -0.8510781064894773
	SLAGREMPLFEZS: -0.8504097005852836
	SLSRVEMPLZS: 0.8332114422469897
	SLFAMWORKMAZS: -0.8227542750561826
	SLSRVEMPLFEZS: 0.8071487731991677
	SLFAMWORKFEZS: -0.8019467636552272
	SHDYNNMRT: -0.789267613591703
	SLSRVEMPLMAZS: 0.7818011831977195
	SPDYNIMRTMAIN: -0.7787744490951155
	SPDYNIMRTIN: -0.7766773450279949
	SPDYNCBRTIN: -0.7765599225775294
	SPDYNIMRTFEIN: -0.7730816668097696
	SPPOP0014MAZS: -0.7697542322480047
	SPPOP0014TOZS: -0.7693686871619337
	SPPOPDPNDYG: -0.7681153175550205
	SPPOP0004MA5Y: -0.7679486317696053
	SPPOP0014FEZS: -0.76695

	SLGDPPCAPEMKD: 0.7386221446526113
	SLINDEMPLZS: 0.7351482866264403
	SPDYNLE00IN: 0.7328533961903069
	SPDYNTO65FEZS: 0.732633785644398
	SPPOP65UPFEZS: 0.7294862796737073
	SPPOP1564MAZS: 0.7286692287230739
	SPURBTOTLINZS: 0.7286678837531002
	SPRURTOTLZS: -0.7286678837531
	SPPOP65UPTOZS: 0.7270439124301049
	SPPOP5559MA5Y: 0.722517891836533
	SPPOP6064MA5Y: 0.7203922214868932
	EGFECRNEWZS: -0.718332955130419
	NVAGRTOTLZS: -0.7167423356202934
	SPPOP5054MA5Y: 0.7157556877504939
	SPPOP6569FE5Y: 0.7151261159057861
	SPPOP6064FE5Y: 0.7131829912637956
	SPPOP6569MA5Y: 0.712796436933612
	SPPOP5559FE5Y: 0.7122462224889371
	SHSTAOWADZS: 0.7122047724884335
	ITMLTMAINP2: 0.7115425002929673
	SPPOPDPNDOL: 0.7111595416887413
	SPPOP7074FE5Y: 0.7108014581667866
	SPPOP65UPMAZS: 0.7096349662993886
	SHSTAOB18MAZS: 0.7093938686620832
	SPPOP7579FE5Y: 0.7081675668913752
	SPPOP4549MA5Y: 0.7072839852530286
	SPPOP80UPFE5Y: 0.70661133252037
	SPPOPDPND: -0.705454138058154
	SPPOP4044MA5Y: 0.7026902010690423
	SPPOP5054F

	SLINDEMPLMAZS: -0.8517106940126538
	SLFAMWORKZS: 0.8436578143581015
	SLAGREMPLFEZS: 0.8301296200967369
	SLFAMWORKMAZS: 0.8232937916030916
	SLSRVEMPLZS: -0.8156976695831939
	SLFAMWORKFEZS: 0.7900585205569635
	SLSRVEMPLFEZS: -0.7846721120763541
	SHDYNNMRT: 0.7760487122488179
	SLSRVEMPLMAZS: -0.7706958930997792
	SPDYNIMRTMAIN: 0.7641505287465105
	SPDYNIMRTIN: 0.7624373556040659
	SPDYNIMRTFEIN: 0.7593262683830236
	SPDYNCBRTIN: 0.7578868759693885
	SPPOPDPNDYG: 0.7469216701028838
	SPPOP0014TOZS: 0.7460203796981972
	SPPOP0004MA5Y: 0.7459626199018502
	SLINDEMPLZS: -0.7456058762824002
	SPPOP0014MAZS: 0.745266946135212
	SPPOP0004FE5Y: 0.7450225301865889
	SHDYNMORTMA: 0.7446222408989335
	SPPOP0014FEZS: 0.744582045806941
	SHDYNMORT: 0.7411448243001365
	SPDYNTFRTIN: 0.7394500520935142
	SHDYNMORTFE: 0.7362955856862374
	NVAGRTOTLZS: 0.7319546959287934
	SPPOP0509FE5Y: 0.7318608433280703
	SPPOP0509MA5Y: 0.7308730499769651
	SPDYNLE00FEIN: -0.7217808287115108
	SHSTAOWADMAZS: -0.7211261995195704
	SPRURTO

	SPPOP2024FE5Y: 0.8854323747590622
	SPPOP65UPMAZS: -0.8847586022062316
	SPPOPDPNDOL: -0.8832012385984712
	SPPOP5054MA5Y: -0.8791958501873723
	SPPOP6569MA5Y: -0.8736917695128484
	SPPOP7579FE5Y: -0.8733065072774346
	SPPOP7579MA5Y: -0.8630520523841955
	SPPOP6569FE5Y: -0.8586448860254459
	SPPOP5054FE5Y: -0.8539537615227397
	SPPOP80UPMA5Y: -0.853378892221767
	SPPOP7074MA5Y: -0.8516301323201799
	SPPOP7074FE5Y: -0.8498669249772075
	SPPOP0014MAZS: 0.8476454079740819
	SPPOP0014TOZS: 0.8452174337946419
	SPPOP0014FEZS: 0.8411411394954224
	SPPOP0509MA5Y: 0.8335964971158444
	SPPOP2024MA5Y: 0.8319212538886847
	SPPOP0509FE5Y: 0.8289014497256135
	SPPOP4549MA5Y: -0.8238587176019055
	SPPOPDPNDYG: 0.7900446377956772
	SPPOP4549FE5Y: -0.7841130058179051
	SPPOP4044MA5Y: -0.783225400712504
	NYGDPPCAPPPKD: -0.7816784706201377
	SPPOP0004MA5Y: 0.7688475170247786
	SPPOP0004FE5Y: 0.7668840130147037
	ITMLTMAINP2: -0.7635825130648943
	SLGDPPCAPEMKD: -0.7631836492006704
	NYGNPPCAPPPCD: -0.7624290022804071
	NYGDPPCAP

	SPDYNIMRTIN: -0.7080311643156466
	SPPOP1014MA5Y: -0.7045965701971413
	SPDYNIMRTFEIN: -0.7040481599743891
	SPPOP0014FEZS: -0.703538878336336
	NYGDPPCAPPPCD: 0.7032596647174707
	NYGNPPCAPPPCD: 0.7015931908963077
SHSTAOB18MAZS: -0.6872056052317248
	SHSTAOWADMAZS: 0.959412091925597
	SHSTAOWADZS: 0.9354114575142998
	SHSTAOWADFEZS: 0.8235864373481315
	SHSTAOB18FEZS: 0.8085167974797219
	SPRURTOTLZS: -0.7620222333246162
	SPURBTOTLINZS: 0.7620222333246162
	SHHTNTRETZS: 0.7489184212802251
	SHHTNTRETFEZS: 0.7479839597465522
	SLAGREMPLMAZS: -0.744307395712225
	SLSRVEMPLZS: 0.7406420615824805
	SHHTNTRETMAZS: 0.7400648970922326
	SLAGREMPLZS: -0.7369185728538915
	SPDYNLE00FEIN: 0.7280674574348275
	SHDYNNMRT: -0.722268429557756
	SPDYNIMRTMAIN: -0.7203218002843349
	SPDYNLE00IN: 0.719501635732985
	SPDYNIMRTIN: -0.7178128119872434
	SPDYNIMRTFEIN: -0.7138033317514889
	SLEMPVULNFEZS: -0.7133865759262965
	SLSRVEMPLFEZS: 0.7131476858065915
	SLSRVEMPLMAZS: 0.7105809748530638
	SLEMPVULNZS: -0.7104446843337203

	SPDYNLE00IN: -0.7185775070585724
	SHDYNMORTFE: 0.7179098465017874
	SLINDEMPLZS: -0.704886698326119
SHIMMPOL3: -0.6685641261366615
	SHIMMIDPT: 0.9645770315750334
	SHIMMMEAS: 0.8926172373191733
	SHDYNMORTFE: -0.7730018146966214
	SHDYNMORT: -0.7720929391783103
	SHDYNMORTMA: -0.7705583185824657
	SPDYNIMRTIN: -0.7579361302361008
	SPDYNIMRTFEIN: -0.7578761502471468
	SPDYNIMRTMAIN: -0.7572444556090572
	NYADJDPEMGNZS: -0.7568788841352854
	SHDYNNMRT: -0.7309307732383503
SLGDPPCAPEMKD: -0.6594854329788002
	NYGDPPCAPPPKD: 0.9755296754127291
	NYGDPPCAPPPCD: 0.9341282928677437
	NYGNPPCAPPPCD: 0.9319567199887464
	NVSRVEMPLKD: 0.9229658341881593
	NYGDPPCAPKD: 0.9052780209770184
	NYGDPPCAPCD: 0.8894644465083911
	NYADJNNTYPCCD: 0.8876249697130493
	NYGNPPCAPCD: 0.8838425350134553
	NECONPRVTPCKD: 0.8796518523877361
	ITMLTMAINP2: 0.8372433107257857
	SPPOP80UPMA5Y: 0.8317267634537381
	NVINDEMPLKD: 0.8296200003645595
	PANUSPPPCRF: 0.8240715150522915
	SPPOP80UPFE5Y: 0.8233800505222079
	SPPOP65UPMAZS: 0.8185

	SPDYNIMRTFEIN: -0.7291211107936856
	SPDYNIMRTIN: -0.7290246704429504
	SPDYNIMRTMAIN: -0.7281715324075291
	SLGDPPCAPEMKD: 0.7259677514380083
	SPDYNLE00FEIN: 0.7185974672513121
	SHSTAOB18MAZS: 0.7131476858065915
	SPDYNLE00IN: 0.7094568505604714
SPDYNAMRTMA: 0.6252394922334107
	SPDYNTO65MAZS: -0.9609565665284245
	SPDYNLE00MAIN: -0.9117430441751181
	SPDYNAMRTFE: 0.9070661713535281
	SPDYNLE00IN: -0.877419381008172
	SPDYNTO65FEZS: -0.8683041391795748
	SPDYNLE00FEIN: -0.832258878528039
SGLOCLIVEEQ: -0.6224014689224076
NYGDPFRSTRTZS: 0.6216812684877902
ENATMCO2EPC: -0.6190543272707001
NYGNPPCAPPPCD: -0.6168285493969561
	NYGDPPCAPPPCD: 0.9966576314179092
	NYGDPPCAPPPKD: 0.9599503719822744
	NYGDPPCAPCD: 0.9443905674772639
	NYADJNNTYPCCD: 0.9441297870343227
	NYGNPPCAPCD: 0.943984255657417
	SLGDPPCAPEMKD: 0.9319567199887464
	NYGDPPCAPKD: 0.90909209115408
	NECONPRVTPCKD: 0.88719557164759
	NVSRVEMPLKD: 0.8860206976573317
	SPPOP80UPMA5Y: 0.8373973477541147
	NVINDEMPLKD: 0.8218346305283608
	SPPOP80UP

#### Listing the variables in var1 - this would be the mechanism by which we can iterate through and remove

In [17]:
for x in var1:
    print(x)

SPPOP0004MA5Y
SPPOP0004FE5Y
SPDYNTFRTIN
SPPOPDPNDYG
SPPOP0014TOZS
SPPOP0014FEZS
SPPOP0014MAZS
SPPOP1564MAZS
SPPOP0509FE5Y
SPPOPDPND
SPPOP0509MA5Y
SPPOP1564TOZS
SPPOP1564FEZS
SPPOP4044MA5Y
SPPOP4044FE5Y
SPPOP4549MA5Y
SPPOP4549FE5Y
SHDYNMORTMA
SPDYNLE00FEIN
SPDYNIMRTMAIN
SPDYNIMRTIN
SHDYNMORT
SPDYNIMRTFEIN
SPPOP1014FE5Y
SHDYNMORTFE
SPDYNTO65FEZS
SPPOP5054MA5Y
SPPOP5054FE5Y
SPPOP1014MA5Y
SPADOTFRT
SPDYNLE00IN
SPPOP5559MA5Y
SPPOP5559FE5Y
SHDYNNMRT
SPPOP6064MA5Y
SPPOP3539MA5Y
SPPOP6064FE5Y
SPPOP6569FE5Y
SPPOP6569MA5Y
SPPOP65UPTOZS
SPPOP65UPFEZS
SPPOP7074FE5Y
NYADJDPEMGNZS
SPDYNLE00MAIN
SPPOP65UPMAZS
SPPOPGROW
SPDYNAMRTFE
SPPOP7074MA5Y
SPPOP7579FE5Y
SPPOP3539FE5Y
SHDYN0509
SPPOPDPNDOL
SLEMPVULNFEZS
SLEMPVULNZS
SLEMPSELFZS
SLEMPWORKZS
SLAGREMPLMAZS
SLEMPSELFFEZS
SLEMPWORKFEZS
SPPOP1519FE5Y
SPPOP7579MA5Y
SLINDEMPLMAZS
SLEMPVULNMAZS
SLEMPSELFMAZS
SLEMPWORKMAZS
SHSTAOWADMAZS
SPPOP80UPFE5Y
SLAGREMPLZS
SHDYN1014
EGFECRNEWZS
EGELCACCSZS
SPPOP1519MA5Y
SPDYNTO65MAZS
SPPOP80UPMA5Y
ITMLTMAINP2
SHHTNTRE

#### Simple check to make sure the correct variables are pulled above

In [18]:
corrDF[(corrDF["SPDYNCBRTIN"]>=.5) & (corrDF["SPDYNCBRTIN"]<1)]["SPDYNCBRTIN"]

SPPOPDPND          0.942828
SPPOPDPNDYG        0.975715
SPPOPGROW          0.806743
SPRURTOTLZG        0.676378
SPRURTOTLZS        0.674301
SPADOTFRT          0.856392
SLTLFCACTMAZS      0.559971
SHDYN2024          0.524507
SHDYNMORTFE        0.872480
SHDYNMORTMA        0.882269
SHDYNNMRT          0.840680
SPDYNIMRTFEIN      0.875938
SPDYNIMRTIN        0.879766
SPDYNIMRTMAIN      0.881979
SHDYNMORT          0.878173
SHDYN1519          0.576639
SHDYN1014          0.745247
SHDYN0509          0.789446
SPDYNTFRTIN        0.984883
SPPOP1014MA5Y      0.860752
SPPOP1519FE5Y      0.775305
SPPOP1519MA5Y      0.738462
SPPOP2024FE5Y      0.604123
SPPOP1014FE5Y      0.874008
SPPOP0509MA5Y      0.940743
SPPOP0004FE5Y      0.988134
SPPOP0004MA5Y      0.988223
SPPOP0014FEZS      0.960013
SPPOP0014MAZS      0.958616
SPPOP0014TOZS      0.960440
SPPOP0509FE5Y      0.944338
NYGDPFRSTRTZS      0.621681
SPDYNAMRTMA        0.625239
SPDYNAMRTFE        0.803279
NYADJDPEMGNZS      0.815039
SLEMPVULNFEZS      0

#### Individual correlation checker - for debugging but I'll leave it for now

In [19]:
corrDF[corrDF.index=='SHDYN0509']["SHDTH0509"]

SHDYN0509    0.259015
Name: SHDTH0509, dtype: float64

#### Calling the results

In [20]:
var1

{'SPPOP0004MA5Y': {'corr': 0.9882226398510024,
  'SPPOP0004FE5Y': {'corr': 0.9965248794708107},
  'SPDYNCBRTIN': {'corr': 0.9882226398510024},
  'SPPOPDPNDYG': {'corr': 0.9864456609176305},
  'SPPOP0014MAZS': {'corr': 0.9817648367127353},
  'SPPOP0014TOZS': {'corr': 0.9814615049312868},
  'SPPOP0014FEZS': {'corr': 0.9790250296182205},
  'SPPOP0509MA5Y': {'corr': 0.9700278430751134},
  'SPPOP0509FE5Y': {'corr': 0.9688036557969784},
  'SPDYNTFRTIN': {'corr': 0.9615510800721716},
  'SPPOP1564MAZS': {'corr': -0.9569900042153677},
  'SPPOPDPND': {'corr': 0.9424462207967538},
  'SPPOP1564TOZS': {'corr': -0.9400659681909289},
  'SPPOP4549MA5Y': {'corr': -0.9219638644479754},
  'SPPOP4044MA5Y': {'corr': -0.9185113553350811},
  'SPPOP4549FE5Y': {'corr': -0.9160811192434877},
  'SPPOP4044FE5Y': {'corr': -0.9047236745103988},
  'SPPOP1564FEZS': {'corr': -0.902590926123452},
  'SPPOP1014FE5Y': {'corr': 0.9018008197484069},
  'SPPOP5054MA5Y': {'corr': -0.8969229149078787},
  'SPPOP1014MA5Y': {'corr

#### Calling a recursive process to look through the above dictionary file and identify uniquely correlated values

In [21]:
# Will recursively iterate through the nested dictionary and create a list of unique keys

def recurdict(d, l):
    for k,v in d.items():
        if k not in l and k not in blacklist and k != 'corr':
            l.append(k)
        if isinstance(v, dict):
            for k2 in v:
                if k2 !='corr':
                    blacklist.append(k2)
            recurdict(v, l)

In [22]:
l = []
blacklist=[]
recurdict(var1, l)

In [23]:
l

['SPPOP0004MA5Y',
 'SGLOCLIVEEQ',
 'NYGDPFRSTRTZS',
 'ENATMCO2EPC',
 'AGYLDCRELKG',
 'SLTLFCACTMAZS',
 'SGLAWINDX',
 'TXVALMRCHR6ZS',
 'SEENRPRIMFMZS',
 'TMVALMRCHR6ZS']

#### A process to make a list of all unique values anywhere in the dictionary

In [24]:
# Will recursively iterate through the nested dictionary and create a list of unique keys

def recurdict(d, l):
    for k,v in d.items():
        if k not in l and k != 'corr':
            l.append(k)
        if isinstance(v, dict):
            recurdict(v, l)

In [25]:
l = []

recurdict(var1, l)

In [26]:
l

['SPPOP0004MA5Y',
 'SPPOP0004FE5Y',
 'SPDYNCBRTIN',
 'SPPOPDPNDYG',
 'SPPOP0014MAZS',
 'SPPOP0014TOZS',
 'SPPOP0014FEZS',
 'SPPOP0509MA5Y',
 'SPPOP0509FE5Y',
 'SPDYNTFRTIN',
 'SPPOP1564MAZS',
 'SPPOPDPND',
 'SPPOP1564TOZS',
 'SPPOP4549MA5Y',
 'SPPOP4044MA5Y',
 'SPPOP4549FE5Y',
 'SPPOP4044FE5Y',
 'SPPOP1564FEZS',
 'SPPOP1014FE5Y',
 'SPPOP5054MA5Y',
 'SPPOP1014MA5Y',
 'SPPOP5054FE5Y',
 'SPPOP5559MA5Y',
 'SPPOP5559FE5Y',
 'SPPOP6064MA5Y',
 'SPPOP6064FE5Y',
 'SPPOP6569MA5Y',
 'SPPOP6569FE5Y',
 'SPPOP65UPTOZS',
 'SPPOP7074FE5Y',
 'SPPOP65UPFEZS',
 'SPDYNLE00FEIN',
 'SPDYNIMRTMAIN',
 'SPADOTFRT',
 'SPDYNIMRTIN',
 'SPPOP65UPMAZS',
 'SPPOP7074MA5Y',
 'SPDYNIMRTFEIN',
 'SPPOP3539MA5Y',
 'SPDYNTO65FEZS',
 'SHDYNMORTMA',
 'SPPOP7579FE5Y',
 'SHDYNMORT',
 'SPDYNLE00IN',
 'SHDYNMORTFE',
 'SPPOPDPNDOL',
 'SHDYNNMRT',
 'SPPOPGROW',
 'SPPOP7579MA5Y',
 'SPPOP1519FE5Y',
 'SPPOP80UPFE5Y',
 'SPDYNLE00MAIN',
 'SPPOP3539FE5Y',
 'SLAGREMPLMAZS',
 'SPDYNAMRTFE',
 'SLEMPVULNFEZS',
 'SLEMPSELFFEZS',
 'SLEMPWORKF