In [29]:
import pandas as pd
import numpy as np

In [30]:
original_df = pd.read_csv('./data/colorado.csv')

In [31]:
original_df.tail()

Unnamed: 0,SPLIT,OWNERSHP,VETWWI,RESPONDT,LABFORCE,SLREC,SSENROLL,SPANNAME,SCHOOL,URBAN,...,CITY,RENT,MIGCOUNTY,MIGMET5,ERSCOR50,MBPLD,FBPLD,BPLD,INCWAGE,VALUEH
661962,0,1,0,2,2,1,0,1,1,1,...,9999,0,9999,9999,99,0,0,4200,0,200
661963,0,2,0,2,2,1,0,1,1,2,...,5670,25,9999,6560,875,15000,0,800,0,9999999
661964,0,2,0,1,1,1,0,1,1,1,...,9999,8,9999,9999,9999,0,0,41000,0,9999999
661965,0,2,0,1,0,1,0,1,2,2,...,1710,40,9999,2080,9999,3900,800,800,999998,9999999
661966,0,2,0,1,0,1,0,2,1,1,...,9999,4,9999,0,9999,0,800,800,999998,9999999


In [32]:
def group_data(original_df,feature_list,max_group=200):
    """
    Split data into groups based on values on the list of features specified. 
    Returns the indices map as a dictionary from indices to group number.
    Arguments:
        original_df: pandas Dataframe input
        feature_list: list of features to be split by. Should be distrete, since the group are determined by all distinct values in that features.
        max_group: maximum number of groups possible to be splitted. This is to stop the algorithm if the number of groups are too large, since it can be exponential
    
    Return: index_to_group, group_to_feature, feature_to_group
        index_to_group: dictionary of indices to group numbers
        group_to_feature: dictionary of group numbers to dictionaries indicating the values of each feature in feature_list
        feature_to_group: dictionary of dictionaries indicating the values of each feature in feature_list to group numbers   
    """
  
    #step 1: go over all possible groups values
    all_value = dict()
    for feature in feature_list:
        if feature not in original_df.columns: #bad feature list
            print('group_data has feature_list that is invalid. The feature',feature,'is not found. Return None.')
            return
        
        #let's see all values to split on
        all_value[feature] = sorted(original_df[feature].unique()) #this is an array now, dtype=int64 for census data
        
    #next, use these values to create a list of all possible tuples of values. 
    #The idea is that, for each new feature, expands all existing tuples by all values of this new feature
    all_group = [] #list of all tuples possible, stored as a list of dictionary
    for idx_feature, feature in enumerate(feature_list):
        if idx_feature == 0:
            #first feature is for initializing the list of dictionaries of first feature
            all_group = [{feature:value} for value in all_value[feature]]
            
        else:
            #expand each dictionary in the list by all_value[feature]
            all_group_new = []
            for dict_ in all_group:
                #this is the new dict to add
                all_group_new.extend([dict(dict_,**{feature:value}) for value in all_value[feature]])
            all_group = all_group_new
        
        print('all_group has',len(all_group),'total groups after processing',idx_feature+1,'feature(s).',
                  'The last feature is',feature,'with',len(all_value[feature]),'possible values' )
        
        if (len(all_group) > max_group):
            print('group_data has more than',max_group,'groups. Return None.')
            return
        
    #return all_value, all_group #for debugging step 1
    
    #step 2: let's create group_to_feature and feature_to_group.
    group_to_feature = {idx_group:dict_ for idx_group, dict_ in enumerate(all_group)}
    
    #note: dict is not hashable. Use frozenset instead.
    feature_to_group = {frozenset(dict_.items()):idx_group for idx_group, dict_ in enumerate(all_group)}
    
    #step 3: for each index in original_df, assign what group it belongs to
    index_to_group = dict()
    
    #do some counting for checking how many in each categories, optional
    group_size = {idx_group:0 for idx_group in range(len(group_to_feature))}
    
    for idx in range(len(original_df)):
        row = original_df.iloc[idx,:] #get one row
        
        #create the dictionary for this row on feature_list
        dict_row = {feature:row[feature] for feature in feature_list}
        
        #this dictionary specify the group number it belongs to
        
        group_num = feature_to_group[frozenset(dict_row.items())]
        index_to_group[idx] = group_num
        
        #do the counting, optional
        group_size[group_num] += 1
                
    #report group size
    for idx_group, size in group_size.items():
        print('Group',idx_group,'of data has',size,'elements.')
        
    
    return index_to_group, group_to_feature, feature_to_group

In [33]:
index_to_group, group_to_feature, feature_to_group = group_data(original_df,['SEX','EDUC'])

#SEX: 1 is male, 2 is female. 2 groups.
#EDUC: 0=no school. 1=grade <5. 2=grade 5-8. 3-5=grade9-11. 6=high school. 7-9=some college. 10-11=college 4-5+ years. 12=missing 
#EDUC --> 0,1,2,3-5,6,7-9,10-11,12. 8 groups.

#total is 16 groups. Will have to merge them back again.

all_group has 2 total groups after processing 1 feature(s). The last feature is SEX with 2 possible values
all_group has 26 total groups after processing 2 feature(s). The last feature is EDUC with 13 possible values
Group 0 of data has 37712 elements.
Group 1 of data has 41403 elements.
Group 2 of data has 120539 elements.
Group 3 of data has 19187 elements.
Group 4 of data has 19901 elements.
Group 5 of data has 12399 elements.
Group 6 of data has 40815 elements.
Group 7 of data has 6718 elements.
Group 8 of data has 7253 elements.
Group 9 of data has 3485 elements.
Group 10 of data has 8878 elements.
Group 11 of data has 4374 elements.
Group 12 of data has 12546 elements.
Group 13 of data has 36020 elements.
Group 14 of data has 35166 elements.
Group 15 of data has 101596 elements.
Group 16 of data has 19871 elements.
Group 17 of data has 21044 elements.
Group 18 of data has 13541 elements.
Group 19 of data has 55858 elements.
Group 20 of data has 8027 elements.
Group 21 of data has

In [34]:
index_to_group

{0: 15,
 1: 13,
 2: 2,
 3: 13,
 4: 10,
 5: 21,
 6: 6,
 7: 15,
 8: 13,
 9: 0,
 10: 1,
 11: 3,
 12: 23,
 13: 3,
 14: 3,
 15: 0,
 16: 15,
 17: 7,
 18: 21,
 19: 0,
 20: 12,
 21: 2,
 22: 2,
 23: 2,
 24: 13,
 25: 10,
 26: 4,
 27: 19,
 28: 21,
 29: 6,
 30: 14,
 31: 19,
 32: 15,
 33: 2,
 34: 15,
 35: 15,
 36: 13,
 37: 19,
 38: 2,
 39: 13,
 40: 21,
 41: 10,
 42: 12,
 43: 17,
 44: 13,
 45: 2,
 46: 18,
 47: 15,
 48: 2,
 49: 2,
 50: 4,
 51: 17,
 52: 15,
 53: 17,
 54: 15,
 55: 17,
 56: 14,
 57: 17,
 58: 5,
 59: 2,
 60: 3,
 61: 0,
 62: 13,
 63: 3,
 64: 20,
 65: 2,
 66: 12,
 67: 16,
 68: 2,
 69: 19,
 70: 19,
 71: 2,
 72: 18,
 73: 1,
 74: 5,
 75: 6,
 76: 15,
 77: 4,
 78: 8,
 79: 15,
 80: 2,
 81: 2,
 82: 2,
 83: 15,
 84: 10,
 85: 24,
 86: 2,
 87: 15,
 88: 4,
 89: 19,
 90: 2,
 91: 2,
 92: 19,
 93: 14,
 94: 6,
 95: 3,
 96: 19,
 97: 2,
 98: 14,
 99: 3,
 100: 15,
 101: 15,
 102: 25,
 103: 19,
 104: 6,
 105: 8,
 106: 7,
 107: 19,
 108: 6,
 109: 5,
 110: 2,
 111: 12,
 112: 8,
 113: 2,
 114: 4,
 115: 19,
 116

In [35]:
group_to_feature

{0: {'SEX': 1, 'EDUC': 0},
 1: {'SEX': 1, 'EDUC': 1},
 2: {'SEX': 1, 'EDUC': 2},
 3: {'SEX': 1, 'EDUC': 3},
 4: {'SEX': 1, 'EDUC': 4},
 5: {'SEX': 1, 'EDUC': 5},
 6: {'SEX': 1, 'EDUC': 6},
 7: {'SEX': 1, 'EDUC': 7},
 8: {'SEX': 1, 'EDUC': 8},
 9: {'SEX': 1, 'EDUC': 9},
 10: {'SEX': 1, 'EDUC': 10},
 11: {'SEX': 1, 'EDUC': 11},
 12: {'SEX': 1, 'EDUC': 99},
 13: {'SEX': 2, 'EDUC': 0},
 14: {'SEX': 2, 'EDUC': 1},
 15: {'SEX': 2, 'EDUC': 2},
 16: {'SEX': 2, 'EDUC': 3},
 17: {'SEX': 2, 'EDUC': 4},
 18: {'SEX': 2, 'EDUC': 5},
 19: {'SEX': 2, 'EDUC': 6},
 20: {'SEX': 2, 'EDUC': 7},
 21: {'SEX': 2, 'EDUC': 8},
 22: {'SEX': 2, 'EDUC': 9},
 23: {'SEX': 2, 'EDUC': 10},
 24: {'SEX': 2, 'EDUC': 11},
 25: {'SEX': 2, 'EDUC': 99}}

In [36]:
feature_to_group

{frozenset({('EDUC', 0), ('SEX', 1)}): 0,
 frozenset({('EDUC', 1), ('SEX', 1)}): 1,
 frozenset({('EDUC', 2), ('SEX', 1)}): 2,
 frozenset({('EDUC', 3), ('SEX', 1)}): 3,
 frozenset({('EDUC', 4), ('SEX', 1)}): 4,
 frozenset({('EDUC', 5), ('SEX', 1)}): 5,
 frozenset({('EDUC', 6), ('SEX', 1)}): 6,
 frozenset({('EDUC', 7), ('SEX', 1)}): 7,
 frozenset({('EDUC', 8), ('SEX', 1)}): 8,
 frozenset({('EDUC', 9), ('SEX', 1)}): 9,
 frozenset({('EDUC', 10), ('SEX', 1)}): 10,
 frozenset({('EDUC', 11), ('SEX', 1)}): 11,
 frozenset({('EDUC', 99), ('SEX', 1)}): 12,
 frozenset({('EDUC', 0), ('SEX', 2)}): 13,
 frozenset({('EDUC', 1), ('SEX', 2)}): 14,
 frozenset({('EDUC', 2), ('SEX', 2)}): 15,
 frozenset({('EDUC', 3), ('SEX', 2)}): 16,
 frozenset({('EDUC', 4), ('SEX', 2)}): 17,
 frozenset({('EDUC', 5), ('SEX', 2)}): 18,
 frozenset({('EDUC', 6), ('SEX', 2)}): 19,
 frozenset({('EDUC', 7), ('SEX', 2)}): 20,
 frozenset({('EDUC', 8), ('SEX', 2)}): 21,
 frozenset({('EDUC', 9), ('SEX', 2)}): 22,
 frozenset({('EDUC

In [37]:
#SEX: 1 is male, 2 is female. 2 groups.
#--------- RACE is not used anymore. Majority is too big (95%+)-------------------
#RACE: 1,2,3,4,5,6 are White, Black/African American or Negro, Native American, Chinese, Japanese, and other Asians.
#RACE --> 1,2,3-6 (other races). 3 groups.
#---------------------------------------------------------------
#EDUC: 0=no school. 1=grade <5. 2=grade 5-8. 3-5=grade9-11. 6=high school. 7-9=some college. 10-11=college 4-5+ years. 12=missing 
#EDUC --> 0,1,2,3-5,6,7-9,10-11,12. 8 groups.

#total is 16 groups. Will have to merge them back again.

#We will hard-code on merging it back.
final_map = dict()

#start with original index maps: index_to_group, group_to_feature, feature_to_group
SEX_map = {1:0,2:1}
#RACE_map = {1:0,2:1}
#RACE_map.update({j:2 for j in range(3,7)})
EDUC_map = {0:0,1:1,2:2, 3:3,4:3,5:3, 6:4 , 7:5,8:5,9:5 , 10:6,11:6 , 99:7} 
for idx in range(len(index_to_group)):
    dict_ = group_to_feature[index_to_group[idx]] #this gets the feature dictionary back
    sex = SEX_map[dict_['SEX']]
    #race = RACE_map[dict_['RACE']]
    educ = EDUC_map[dict_['EDUC']]
    
    #get group number, hard-coded
    group_num = 8*sex + educ
    
    final_map[idx] = group_num  
        

In [38]:
final_map
#we will get nicely splitted 16 groups!

{0: 10,
 1: 8,
 2: 2,
 3: 8,
 4: 6,
 5: 13,
 6: 4,
 7: 10,
 8: 8,
 9: 0,
 10: 1,
 11: 3,
 12: 14,
 13: 3,
 14: 3,
 15: 0,
 16: 10,
 17: 5,
 18: 13,
 19: 0,
 20: 7,
 21: 2,
 22: 2,
 23: 2,
 24: 8,
 25: 6,
 26: 3,
 27: 12,
 28: 13,
 29: 4,
 30: 9,
 31: 12,
 32: 10,
 33: 2,
 34: 10,
 35: 10,
 36: 8,
 37: 12,
 38: 2,
 39: 8,
 40: 13,
 41: 6,
 42: 7,
 43: 11,
 44: 8,
 45: 2,
 46: 11,
 47: 10,
 48: 2,
 49: 2,
 50: 3,
 51: 11,
 52: 10,
 53: 11,
 54: 10,
 55: 11,
 56: 9,
 57: 11,
 58: 3,
 59: 2,
 60: 3,
 61: 0,
 62: 8,
 63: 3,
 64: 13,
 65: 2,
 66: 7,
 67: 11,
 68: 2,
 69: 12,
 70: 12,
 71: 2,
 72: 11,
 73: 1,
 74: 3,
 75: 4,
 76: 10,
 77: 3,
 78: 5,
 79: 10,
 80: 2,
 81: 2,
 82: 2,
 83: 10,
 84: 6,
 85: 14,
 86: 2,
 87: 10,
 88: 3,
 89: 12,
 90: 2,
 91: 2,
 92: 12,
 93: 9,
 94: 4,
 95: 3,
 96: 12,
 97: 2,
 98: 9,
 99: 3,
 100: 10,
 101: 10,
 102: 15,
 103: 12,
 104: 4,
 105: 5,
 106: 5,
 107: 12,
 108: 4,
 109: 3,
 110: 2,
 111: 7,
 112: 5,
 113: 2,
 114: 3,
 115: 12,
 116: 7,
 117: 10,
 118:

In [39]:
#find the size of the group
'''
should get
0      37712
1      41403
2     120539
3      51487
4      40815
5      17456
6      13252
7      12546
8      36020
9      35166
10    101596
11     54456
12     55858
13     21438
14     11242
15     10981
'''
group_sizes = pd.Series(final_map).value_counts().sort_index()
group_sizes

0      37712
1      41403
2     120539
3      51487
4      40815
5      17456
6      13252
7      12546
8      36020
9      35166
10    101596
11     54456
12     55858
13     21438
14     11242
15     10981
dtype: int64

In [28]:
# save the size of the group, in case we need it for normalization
np.save('group_sizes',group_sizes.values)

In [42]:
# also save this mapping. Takes a few minutes each time to calculate
import json
map_json = json.dumps(final_map)
f = open("group_map.json","w")
f.write(map_json)
f.close()

In [14]:
#now that we have group mapping, we can put data in correct matrix.

n=1000 #7284 is the max. Use only first n columns. Using 1000 instead of 7284 columns makes data processing about 22.3 times faster
#n=1000 uses 21.85 seconds per 1000 entries. So about 14400 seconds in total.
k=16
B=[np.zeros((n,n)) for i in range(k)]

import time
time_start = time.time()
print_every=500

#use standardized data
m=661967
interval = 1000 #break into this many per file
index = 0
while (index < m):
    start = index
    end = index + interval
    std_df = pd.read_csv('./std_data/colorado_at_'+str(start)+'to'+str(end)+'.csv',header=0)
    
    #dealing with first column formatting
    std_df.set_index(std_df.iloc[:,0],inplace=True)
    std_df.drop('Unnamed: 0', axis=1, inplace=True)
    std_df.index.name = None
    
    #go through each datapoint
    for idx_row in std_df.index:
        group_num = final_map[idx_row]
        
        #add vector v v^T to correct matrix B_{group_num}. Use only first n columns by slicing on the Pandas Series
        vec = std_df.loc[idx_row].iloc[:n].values.reshape(n,1) #reshape so it is a column vector
        B[group_num] += (vec @ vec.transpose())/m #divide m to normalize things, for now. Else number can overflow too big.
        
        if((idx_row+1) % print_every == 0): print('Processed', idx_row+1, 'rows. Time used:',time.time()-time_start,'seconds.')
    
    index += interval

Processed 500 rows. Time used: 15.26449990272522 seconds.
Processed 1000 rows. Time used: 27.188015460968018 seconds.
Processed 1500 rows. Time used: 41.60253691673279 seconds.
Processed 2000 rows. Time used: 54.4543023109436 seconds.
Processed 2500 rows. Time used: 71.18393850326538 seconds.
Processed 3000 rows. Time used: 83.31373739242554 seconds.
Processed 3500 rows. Time used: 99.64919638633728 seconds.
Processed 4000 rows. Time used: 122.2981231212616 seconds.
Processed 4500 rows. Time used: 144.36808586120605 seconds.
Processed 5000 rows. Time used: 159.3888761997223 seconds.
Processed 5500 rows. Time used: 172.96066737174988 seconds.
Processed 6000 rows. Time used: 184.15839958190918 seconds.
Processed 6500 rows. Time used: 197.6675307750702 seconds.
Processed 7000 rows. Time used: 208.64702606201172 seconds.
Processed 7500 rows. Time used: 222.9220654964447 seconds.
Processed 8000 rows. Time used: 236.53171801567078 seconds.
Processed 8500 rows. Time used: 250.01535081863403 s

Processed 68500 rows. Time used: 1881.946055650711 seconds.
Processed 69000 rows. Time used: 1895.5678358078003 seconds.
Processed 69500 rows. Time used: 1910.4381256103516 seconds.
Processed 70000 rows. Time used: 1921.9338901042938 seconds.
Processed 70500 rows. Time used: 1935.8545067310333 seconds.
Processed 71000 rows. Time used: 1949.4111914634705 seconds.
Processed 71500 rows. Time used: 1964.3298273086548 seconds.
Processed 72000 rows. Time used: 1975.6414365768433 seconds.
Processed 72500 rows. Time used: 1992.1431059837341 seconds.
Processed 73000 rows. Time used: 2005.7895548343658 seconds.
Processed 73500 rows. Time used: 2020.3140325546265 seconds.
Processed 74000 rows. Time used: 2031.9066169261932 seconds.
Processed 74500 rows. Time used: 2048.5752255916595 seconds.
Processed 75000 rows. Time used: 2063.6914887428284 seconds.
Processed 75500 rows. Time used: 2080.513487100601 seconds.
Processed 76000 rows. Time used: 2096.1100244522095 seconds.
Processed 76500 rows. Time

Processed 136000 rows. Time used: 3810.3437218666077 seconds.
Processed 136500 rows. Time used: 3823.425651550293 seconds.
Processed 137000 rows. Time used: 3833.7258055210114 seconds.
Processed 137500 rows. Time used: 3847.6806223392487 seconds.
Processed 138000 rows. Time used: 3859.8430511951447 seconds.
Processed 138500 rows. Time used: 3875.224244117737 seconds.
Processed 139000 rows. Time used: 3885.965049982071 seconds.
Processed 139500 rows. Time used: 3904.8375182151794 seconds.
Processed 140000 rows. Time used: 3917.3398468494415 seconds.
Processed 140500 rows. Time used: 3934.4578914642334 seconds.
Processed 141000 rows. Time used: 3945.1047337055206 seconds.
Processed 141500 rows. Time used: 3960.094400167465 seconds.
Processed 142000 rows. Time used: 3972.418847322464 seconds.
Processed 142500 rows. Time used: 3987.835518836975 seconds.
Processed 143000 rows. Time used: 4000.9430458545685 seconds.
Processed 143500 rows. Time used: 4017.2120378017426 seconds.
Processed 1440

Processed 203000 rows. Time used: 5512.506633758545 seconds.
Processed 203500 rows. Time used: 5525.118166208267 seconds.
Processed 204000 rows. Time used: 5535.280298471451 seconds.
Processed 204500 rows. Time used: 5548.451548576355 seconds.
Processed 205000 rows. Time used: 5560.699025630951 seconds.
Processed 205500 rows. Time used: 5573.336449623108 seconds.
Processed 206000 rows. Time used: 5583.581522941589 seconds.
Processed 206500 rows. Time used: 5595.999898672104 seconds.
Processed 207000 rows. Time used: 5606.19900560379 seconds.
Processed 207500 rows. Time used: 5618.660356044769 seconds.
Processed 208000 rows. Time used: 5628.857867717743 seconds.
Processed 208500 rows. Time used: 5641.318498134613 seconds.
Processed 209000 rows. Time used: 5651.517389535904 seconds.
Processed 209500 rows. Time used: 5664.045756340027 seconds.
Processed 210000 rows. Time used: 5674.65353512764 seconds.
Processed 210500 rows. Time used: 5687.445967912674 seconds.
Processed 211000 rows. Tim

Processed 270500 rows. Time used: 7053.547634840012 seconds.
Processed 271000 rows. Time used: 7063.508304357529 seconds.
Processed 271500 rows. Time used: 7076.148206710815 seconds.
Processed 272000 rows. Time used: 7087.405611991882 seconds.
Processed 272500 rows. Time used: 7100.585784435272 seconds.
Processed 273000 rows. Time used: 7111.199489831924 seconds.
Processed 273500 rows. Time used: 7124.3609273433685 seconds.
Processed 274000 rows. Time used: 7135.133532047272 seconds.
Processed 274500 rows. Time used: 7148.3067715168 seconds.
Processed 275000 rows. Time used: 7159.00546002388 seconds.
Processed 275500 rows. Time used: 7171.894514799118 seconds.
Processed 276000 rows. Time used: 7182.512082338333 seconds.
Processed 276500 rows. Time used: 7195.459741592407 seconds.
Processed 277000 rows. Time used: 7206.257503271103 seconds.
Processed 277500 rows. Time used: 7219.112175226212 seconds.
Processed 278000 rows. Time used: 7229.8580276966095 seconds.
Processed 278500 rows. Ti

Processed 338000 rows. Time used: 8606.441380023956 seconds.
Processed 338500 rows. Time used: 8618.903488636017 seconds.
Processed 339000 rows. Time used: 8629.141299962997 seconds.
Processed 339500 rows. Time used: 8641.459023714066 seconds.
Processed 340000 rows. Time used: 8651.680161714554 seconds.
Processed 340500 rows. Time used: 8664.042162895203 seconds.
Processed 341000 rows. Time used: 8674.086903095245 seconds.
Processed 341500 rows. Time used: 8686.8022108078 seconds.
Processed 342000 rows. Time used: 8696.969517469406 seconds.
Processed 342500 rows. Time used: 8709.451548337936 seconds.
Processed 343000 rows. Time used: 8719.445495128632 seconds.
Processed 343500 rows. Time used: 8731.777603626251 seconds.
Processed 344000 rows. Time used: 8741.788313627243 seconds.
Processed 344500 rows. Time used: 8754.427766561508 seconds.
Processed 345000 rows. Time used: 8764.428264856339 seconds.
Processed 345500 rows. Time used: 8776.730930805206 seconds.
Processed 346000 rows. Tim

Processed 405500 rows. Time used: 10164.538385868073 seconds.
Processed 406000 rows. Time used: 10175.240362644196 seconds.
Processed 406500 rows. Time used: 10188.091053724289 seconds.
Processed 407000 rows. Time used: 10198.697196245193 seconds.
Processed 407500 rows. Time used: 10214.258533716202 seconds.
Processed 408000 rows. Time used: 10225.343254804611 seconds.
Processed 408500 rows. Time used: 10238.35721707344 seconds.
Processed 409000 rows. Time used: 10249.176959991455 seconds.
Processed 409500 rows. Time used: 10262.306414604187 seconds.
Processed 410000 rows. Time used: 10272.919516563416 seconds.
Processed 410500 rows. Time used: 10285.842119932175 seconds.
Processed 411000 rows. Time used: 10296.572257995605 seconds.
Processed 411500 rows. Time used: 10309.469517707825 seconds.
Processed 412000 rows. Time used: 10320.021403074265 seconds.
Processed 412500 rows. Time used: 10332.893510580063 seconds.
Processed 413000 rows. Time used: 10343.460562944412 seconds.
Processed

Processed 472000 rows. Time used: 11693.574858427048 seconds.
Processed 472500 rows. Time used: 11705.935061693192 seconds.
Processed 473000 rows. Time used: 11716.075354337692 seconds.
Processed 473500 rows. Time used: 11728.334115982056 seconds.
Processed 474000 rows. Time used: 11738.517609834671 seconds.
Processed 474500 rows. Time used: 11750.76854133606 seconds.
Processed 475000 rows. Time used: 11760.989753723145 seconds.
Processed 475500 rows. Time used: 11773.385200738907 seconds.
Processed 476000 rows. Time used: 11783.434453725815 seconds.
Processed 476500 rows. Time used: 11795.718556165695 seconds.
Processed 477000 rows. Time used: 11806.049743652344 seconds.
Processed 477500 rows. Time used: 11818.454089641571 seconds.
Processed 478000 rows. Time used: 11828.524303913116 seconds.
Processed 478500 rows. Time used: 11840.724220991135 seconds.
Processed 479000 rows. Time used: 11850.849686145782 seconds.
Processed 479500 rows. Time used: 11863.100677013397 seconds.
Processed

Processed 538500 rows. Time used: 13214.095994234085 seconds.
Processed 539000 rows. Time used: 13224.738193035126 seconds.
Processed 539500 rows. Time used: 13237.548425674438 seconds.
Processed 540000 rows. Time used: 13248.21111702919 seconds.
Processed 540500 rows. Time used: 13261.040220499039 seconds.
Processed 541000 rows. Time used: 13271.67602443695 seconds.
Processed 541500 rows. Time used: 13284.703368902206 seconds.
Processed 542000 rows. Time used: 13295.331707239151 seconds.
Processed 542500 rows. Time used: 13308.082649469376 seconds.
Processed 543000 rows. Time used: 13318.574115753174 seconds.
Processed 543500 rows. Time used: 13331.411343812943 seconds.
Processed 544000 rows. Time used: 13341.843417406082 seconds.
Processed 544500 rows. Time used: 13354.94069480896 seconds.
Processed 545000 rows. Time used: 13365.743065834045 seconds.
Processed 545500 rows. Time used: 13378.520753622055 seconds.
Processed 546000 rows. Time used: 13389.110352754593 seconds.
Processed 5

Processed 605000 rows. Time used: 14746.634343862534 seconds.
Processed 605500 rows. Time used: 14758.938281536102 seconds.
Processed 606000 rows. Time used: 14768.994196414948 seconds.
Processed 606500 rows. Time used: 14781.192336797714 seconds.
Processed 607000 rows. Time used: 14791.076757669449 seconds.
Processed 607500 rows. Time used: 14803.524753570557 seconds.
Processed 608000 rows. Time used: 14813.746299266815 seconds.
Processed 608500 rows. Time used: 14826.042096853256 seconds.
Processed 609000 rows. Time used: 14835.975928068161 seconds.
Processed 609500 rows. Time used: 14848.14542388916 seconds.
Processed 610000 rows. Time used: 14858.31340098381 seconds.
Processed 610500 rows. Time used: 14870.481796264648 seconds.
Processed 611000 rows. Time used: 14880.453356266022 seconds.
Processed 611500 rows. Time used: 14892.665004491806 seconds.
Processed 612000 rows. Time used: 14902.520200252533 seconds.
Processed 612500 rows. Time used: 14914.571108818054 seconds.
Processed 

In [15]:
for group_num, mat in enumerate(B):
    print(group_num,np.linalg.eigh(mat)[0].max())

0 3.675623521260823
1 2.025093219867852
2 5.142243271754197
3 2.405058865775579
4 2.67446946747546
5 1.5492022647132917
6 2.529988569736907
7 1.918247470995683
8 3.610386476417945
9 2.0122435139867845
10 3.438719820877974
11 1.938973671655857
12 2.067629069416331
13 1.3571494192577527
14 1.4739292827041663
15 1.4735409920239932


In [18]:
#save these matrices
for idx, mat in enumerate(B):
    np.save('./mat/n=1000_group'+str(idx+1),mat)

In [16]:
n=2000 #7284 is the max. Use only first n columns.

k=16
C=[np.zeros((n,n)) for i in range(k)]

import time
time_start = time.time()
print_every=500

#use standardized data
m=661967
interval = 1000 #break into this many per file
index = 0
while (index < m):
    start = index
    end = index + interval
    std_df = pd.read_csv('./std_data/colorado_at_'+str(start)+'to'+str(end)+'.csv',header=0)
    
    #dealing with first column formatting
    std_df.set_index(std_df.iloc[:,0],inplace=True)
    std_df.drop('Unnamed: 0', axis=1, inplace=True)
    std_df.index.name = None
    
    #go through each datapoint
    for idx_row in std_df.index:
        group_num = final_map[idx_row]
        
        #add vector v v^T to correct matrix B_{group_num}. Use only first n columns by slicing on the Pandas Series
        vec = std_df.loc[idx_row].iloc[:n].values.reshape(n,1) #reshape so it is a column vector
        C[group_num] += (vec @ vec.transpose())/m #divide m to normalize things, for now. Else number can overflow too big.
        
        if((idx_row+1) % print_every == 0): print('Processed', idx_row+1, 'rows. Time used:',time.time()-time_start,'seconds.')
    
    index += interval

Processed 500 rows. Time used: 32.87811541557312 seconds.
Processed 1000 rows. Time used: 64.08777618408203 seconds.
Processed 1500 rows. Time used: 98.17055177688599 seconds.
Processed 2000 rows. Time used: 129.67709803581238 seconds.
Processed 2500 rows. Time used: 163.65369701385498 seconds.
Processed 3000 rows. Time used: 195.1724407672882 seconds.
Processed 3500 rows. Time used: 229.0925590991974 seconds.
Processed 4000 rows. Time used: 260.65775752067566 seconds.
Processed 4500 rows. Time used: 294.3624002933502 seconds.
Processed 5000 rows. Time used: 327.84960985183716 seconds.
Processed 5500 rows. Time used: 361.64763474464417 seconds.
Processed 6000 rows. Time used: 392.86942982673645 seconds.
Processed 6500 rows. Time used: 426.3505446910858 seconds.
Processed 7000 rows. Time used: 457.4791398048401 seconds.
Processed 7500 rows. Time used: 490.88934803009033 seconds.
Processed 8000 rows. Time used: 522.4823460578918 seconds.
Processed 8500 rows. Time used: 556.051043510437 s

Processed 69000 rows. Time used: 4462.701095819473 seconds.
Processed 69500 rows. Time used: 4496.16147685051 seconds.
Processed 70000 rows. Time used: 4527.409222841263 seconds.
Processed 70500 rows. Time used: 4560.6559154987335 seconds.
Processed 71000 rows. Time used: 4591.934368610382 seconds.
Processed 71500 rows. Time used: 4624.987406253815 seconds.
Processed 72000 rows. Time used: 4655.987082004547 seconds.
Processed 72500 rows. Time used: 4688.7270476818085 seconds.
Processed 73000 rows. Time used: 4719.133071184158 seconds.
Processed 73500 rows. Time used: 4751.953127861023 seconds.
Processed 74000 rows. Time used: 4783.248980760574 seconds.
Processed 74500 rows. Time used: 4816.560532808304 seconds.
Processed 75000 rows. Time used: 4847.331171989441 seconds.
Processed 75500 rows. Time used: 4880.503208637238 seconds.
Processed 76000 rows. Time used: 4911.240885019302 seconds.
Processed 76500 rows. Time used: 4944.197649478912 seconds.
Processed 77000 rows. Time used: 4977.2

Processed 137000 rows. Time used: 8871.693171262741 seconds.
Processed 137500 rows. Time used: 8904.764507770538 seconds.
Processed 138000 rows. Time used: 8935.637883663177 seconds.
Processed 138500 rows. Time used: 8968.922806978226 seconds.
Processed 139000 rows. Time used: 8999.74120593071 seconds.
Processed 139500 rows. Time used: 9032.916881084442 seconds.
Processed 140000 rows. Time used: 9063.488882303238 seconds.
Processed 140500 rows. Time used: 9096.566246032715 seconds.
Processed 141000 rows. Time used: 9127.109154701233 seconds.
Processed 141500 rows. Time used: 9160.018290996552 seconds.
Processed 142000 rows. Time used: 9191.681327581406 seconds.
Processed 142500 rows. Time used: 9225.666078329086 seconds.
Processed 143000 rows. Time used: 9257.000670433044 seconds.
Processed 143500 rows. Time used: 9290.888775110245 seconds.
Processed 144000 rows. Time used: 9323.046501636505 seconds.
Processed 144500 rows. Time used: 9356.720603704453 seconds.
Processed 145000 rows. Ti

Processed 204000 rows. Time used: 13206.515253782272 seconds.
Processed 204500 rows. Time used: 13240.609186887741 seconds.
Processed 205000 rows. Time used: 13272.237042188644 seconds.
Processed 205500 rows. Time used: 13306.676091194153 seconds.
Processed 206000 rows. Time used: 13338.002225160599 seconds.
Processed 206500 rows. Time used: 13371.780093669891 seconds.
Processed 207000 rows. Time used: 13403.386096954346 seconds.
Processed 207500 rows. Time used: 13436.958808660507 seconds.
Processed 208000 rows. Time used: 13468.367035150528 seconds.
Processed 208500 rows. Time used: 13503.691589832306 seconds.
Processed 209000 rows. Time used: 13535.323355197906 seconds.
Processed 209500 rows. Time used: 13568.898179292679 seconds.
Processed 210000 rows. Time used: 13600.236217737198 seconds.
Processed 210500 rows. Time used: 13633.600091457367 seconds.
Processed 211000 rows. Time used: 13664.958143949509 seconds.
Processed 211500 rows. Time used: 13698.034541368484 seconds.
Processe

Processed 270500 rows. Time used: 17522.248846769333 seconds.
Processed 271000 rows. Time used: 17553.380917310715 seconds.
Processed 271500 rows. Time used: 17586.879554748535 seconds.
Processed 272000 rows. Time used: 17618.140374422073 seconds.
Processed 272500 rows. Time used: 17651.78326368332 seconds.
Processed 273000 rows. Time used: 17682.589147090912 seconds.
Processed 273500 rows. Time used: 17715.842932224274 seconds.
Processed 274000 rows. Time used: 17746.862461328506 seconds.
Processed 274500 rows. Time used: 17780.438707351685 seconds.
Processed 275000 rows. Time used: 17811.397608995438 seconds.
Processed 275500 rows. Time used: 17844.886325120926 seconds.
Processed 276000 rows. Time used: 17875.934965848923 seconds.
Processed 276500 rows. Time used: 17908.90163731575 seconds.
Processed 277000 rows. Time used: 17939.503100395203 seconds.
Processed 277500 rows. Time used: 17972.62257051468 seconds.
Processed 278000 rows. Time used: 18003.14777612686 seconds.
Processed 27

Processed 337000 rows. Time used: 21832.18225455284 seconds.
Processed 337500 rows. Time used: 21865.13852953911 seconds.
Processed 338000 rows. Time used: 21897.641252994537 seconds.
Processed 338500 rows. Time used: 21931.31513619423 seconds.
Processed 339000 rows. Time used: 21961.854933023453 seconds.
Processed 339500 rows. Time used: 21994.588383197784 seconds.
Processed 340000 rows. Time used: 22025.037527561188 seconds.
Processed 340500 rows. Time used: 22058.90374970436 seconds.
Processed 341000 rows. Time used: 22090.625354766846 seconds.
Processed 341500 rows. Time used: 22124.393346309662 seconds.
Processed 342000 rows. Time used: 22156.217217206955 seconds.
Processed 342500 rows. Time used: 22190.511320352554 seconds.
Processed 343000 rows. Time used: 22222.099831581116 seconds.
Processed 343500 rows. Time used: 22255.649769306183 seconds.
Processed 344000 rows. Time used: 22287.24366235733 seconds.
Processed 344500 rows. Time used: 22321.189945220947 seconds.
Processed 345

Processed 403500 rows. Time used: 26286.15631890297 seconds.
Processed 404000 rows. Time used: 26318.605207443237 seconds.
Processed 404500 rows. Time used: 26353.89659166336 seconds.
Processed 405000 rows. Time used: 26387.10101556778 seconds.
Processed 405500 rows. Time used: 26421.985845565796 seconds.
Processed 406000 rows. Time used: 26454.472802877426 seconds.
Processed 406500 rows. Time used: 26489.910468816757 seconds.
Processed 407000 rows. Time used: 26524.315049648285 seconds.
Processed 407500 rows. Time used: 26560.28792500496 seconds.
Processed 408000 rows. Time used: 26593.834594011307 seconds.
Processed 408500 rows. Time used: 26629.2224919796 seconds.
Processed 409000 rows. Time used: 26661.485468149185 seconds.
Processed 409500 rows. Time used: 26696.372051477432 seconds.
Processed 410000 rows. Time used: 26728.92607331276 seconds.
Processed 410500 rows. Time used: 26763.434437036514 seconds.
Processed 411000 rows. Time used: 26795.923799276352 seconds.
Processed 41150

Processed 470500 rows. Time used: 30694.048112154007 seconds.
Processed 471000 rows. Time used: 30724.865718841553 seconds.
Processed 471500 rows. Time used: 30757.964266061783 seconds.
Processed 472000 rows. Time used: 30788.7751288414 seconds.
Processed 472500 rows. Time used: 30822.949368953705 seconds.
Processed 473000 rows. Time used: 30854.41890692711 seconds.
Processed 473500 rows. Time used: 30888.466953516006 seconds.
Processed 474000 rows. Time used: 30920.512397289276 seconds.
Processed 474500 rows. Time used: 30954.683460474014 seconds.
Processed 475000 rows. Time used: 30986.45936536789 seconds.
Processed 475500 rows. Time used: 31020.318885803223 seconds.
Processed 476000 rows. Time used: 31052.021861314774 seconds.
Processed 476500 rows. Time used: 31086.174963474274 seconds.
Processed 477000 rows. Time used: 31117.56767487526 seconds.
Processed 477500 rows. Time used: 31151.45478773117 seconds.
Processed 478000 rows. Time used: 31182.94470310211 seconds.
Processed 47850

Processed 537500 rows. Time used: 35070.088562488556 seconds.
Processed 538000 rows. Time used: 35101.34945631027 seconds.
Processed 538500 rows. Time used: 35135.40907764435 seconds.
Processed 539000 rows. Time used: 35166.36756300926 seconds.
Processed 539500 rows. Time used: 35199.640276670456 seconds.
Processed 540000 rows. Time used: 35230.4651260376 seconds.
Processed 540500 rows. Time used: 35263.93654561043 seconds.
Processed 541000 rows. Time used: 35294.65924477577 seconds.
Processed 541500 rows. Time used: 35327.693606853485 seconds.
Processed 542000 rows. Time used: 35358.779072761536 seconds.
Processed 542500 rows. Time used: 35392.068335056305 seconds.
Processed 543000 rows. Time used: 35423.10157632828 seconds.
Processed 543500 rows. Time used: 35456.24388599396 seconds.
Processed 544000 rows. Time used: 35488.40351462364 seconds.
Processed 544500 rows. Time used: 35522.47708773613 seconds.
Processed 545000 rows. Time used: 35554.398188114166 seconds.
Processed 545500 ro

Processed 604500 rows. Time used: 39445.88307738304 seconds.
Processed 605000 rows. Time used: 39476.58305644989 seconds.
Processed 605500 rows. Time used: 39509.573347091675 seconds.
Processed 606000 rows. Time used: 39540.3778116703 seconds.
Processed 606500 rows. Time used: 39574.28550338745 seconds.
Processed 607000 rows. Time used: 39605.910803079605 seconds.
Processed 607500 rows. Time used: 39640.394911527634 seconds.
Processed 608000 rows. Time used: 39671.9564166069 seconds.
Processed 608500 rows. Time used: 39707.854279994965 seconds.
Processed 609000 rows. Time used: 39739.373413562775 seconds.
Processed 609500 rows. Time used: 39773.506836652756 seconds.
Processed 610000 rows. Time used: 39805.46462512016 seconds.
Processed 610500 rows. Time used: 39839.19685292244 seconds.
Processed 611000 rows. Time used: 39870.53537273407 seconds.
Processed 611500 rows. Time used: 39904.41546678543 seconds.
Processed 612000 rows. Time used: 39937.41377902031 seconds.
Processed 612500 row

In [17]:
for group_num, mat in enumerate(C):
    print(group_num,np.linalg.eigh(mat)[0].min())

0 -4.906718781428809e-13
1 -1.9714223295927395e-13
2 -1.8336757299925135e-12
3 -4.046724343820797e-13
4 -3.219143094587027e-13
5 -7.176042141081703e-14
6 -3.685496097469833e-14
7 -2.6641283866788154e-14
8 -4.729000836561221e-13
9 -1.72059695031734e-13
10 -9.184864823131048e-13
11 -2.2329878948669648e-13
12 -3.0376691105133716e-13
13 -5.0158090084316294e-14
14 -1.106687187808267e-14
15 -1.9680921184201135e-14


In [20]:
#save these matrices    
for idx, mat in enumerate(C):
    np.save('./mat/n=2000_group'+str(idx+1),mat)

In [43]:
n=3000 #7284 is the max. Use only first n columns.

k=16
D=[np.zeros((n,n)) for i in range(k)]

import time
time_start = time.time()
print_every=500

#use standardized data
m=661967
interval = 1000 #break into this many per file
index = 0
while (index < m):
    start = index
    end = index + interval
    std_df = pd.read_csv('./std_data/colorado_at_'+str(start)+'to'+str(end)+'.csv',header=0)
    
    #dealing with first column formatting
    std_df.set_index(std_df.iloc[:,0],inplace=True)
    std_df.drop('Unnamed: 0', axis=1, inplace=True)
    std_df.index.name = None
    
    #go through each datapoint
    for idx_row in std_df.index:
        group_num = final_map[idx_row]
        
        #add vector v v^T to correct matrix B_{group_num}. Use only first n columns by slicing on the Pandas Series
        vec = std_df.loc[idx_row].iloc[:n].values.reshape(n,1) #reshape so it is a column vector
        D[group_num] += (vec @ vec.transpose())/m #divide m to normalize things, for now. Else number can overflow too big.
        
        if((idx_row+1) % print_every == 0): print('Processed', idx_row+1, 'rows. Time used:',time.time()-time_start,'seconds.')
    
    index += interval

Processed 500 rows. Time used: 115.38290691375732 seconds.
Processed 1000 rows. Time used: 219.13734197616577 seconds.
Processed 1500 rows. Time used: 326.9590380191803 seconds.
Processed 2000 rows. Time used: 431.405394077301 seconds.
Processed 2500 rows. Time used: 540.0839693546295 seconds.
Processed 3000 rows. Time used: 643.7270267009735 seconds.
Processed 3500 rows. Time used: 749.6474266052246 seconds.
Processed 4000 rows. Time used: 846.0094170570374 seconds.
Processed 4500 rows. Time used: 946.7712981700897 seconds.
Processed 5000 rows. Time used: 1043.7789452075958 seconds.
Processed 5500 rows. Time used: 1143.717398405075 seconds.
Processed 6000 rows. Time used: 1239.4507043361664 seconds.
Processed 6500 rows. Time used: 1340.3612840175629 seconds.
Processed 7000 rows. Time used: 1438.6578693389893 seconds.
Processed 7500 rows. Time used: 1542.3975734710693 seconds.
Processed 8000 rows. Time used: 1641.2438380718231 seconds.
Processed 8500 rows. Time used: 1743.8871884346008

Processed 69000 rows. Time used: 12346.886925935745 seconds.
Processed 69500 rows. Time used: 12415.034624814987 seconds.
Processed 70000 rows. Time used: 12481.89390039444 seconds.
Processed 70500 rows. Time used: 12551.145167350769 seconds.
Processed 71000 rows. Time used: 12617.271448850632 seconds.
Processed 71500 rows. Time used: 12686.211361646652 seconds.
Processed 72000 rows. Time used: 12754.805770874023 seconds.
Processed 72500 rows. Time used: 12823.306971549988 seconds.
Processed 73000 rows. Time used: 12889.132806062698 seconds.
Processed 73500 rows. Time used: 12958.055027008057 seconds.
Processed 74000 rows. Time used: 13023.930128335953 seconds.
Processed 74500 rows. Time used: 13093.824773550034 seconds.
Processed 75000 rows. Time used: 13160.62252664566 seconds.
Processed 75500 rows. Time used: 13228.694764614105 seconds.
Processed 76000 rows. Time used: 13295.352854967117 seconds.
Processed 76500 rows. Time used: 13363.741006851196 seconds.
Processed 77000 rows. Time

Processed 136000 rows. Time used: 21402.202458143234 seconds.
Processed 136500 rows. Time used: 21471.889225244522 seconds.
Processed 137000 rows. Time used: 21537.638868808746 seconds.
Processed 137500 rows. Time used: 21607.496220111847 seconds.
Processed 138000 rows. Time used: 21674.250228405 seconds.
Processed 138500 rows. Time used: 21744.97900867462 seconds.
Processed 139000 rows. Time used: 21811.80759358406 seconds.
Processed 139500 rows. Time used: 21880.03707265854 seconds.
Processed 140000 rows. Time used: 21946.510511159897 seconds.
Processed 140500 rows. Time used: 22014.98807501793 seconds.
Processed 141000 rows. Time used: 22081.874363422394 seconds.
Processed 141500 rows. Time used: 22149.65065932274 seconds.
Processed 142000 rows. Time used: 22216.386295080185 seconds.
Processed 142500 rows. Time used: 22285.269731283188 seconds.
Processed 143000 rows. Time used: 22350.84934616089 seconds.
Processed 143500 rows. Time used: 22419.54756641388 seconds.
Processed 144000 r

Processed 202500 rows. Time used: 30413.301535367966 seconds.
Processed 203000 rows. Time used: 30480.138386964798 seconds.
Processed 203500 rows. Time used: 30548.18770456314 seconds.
Processed 204000 rows. Time used: 30615.038523197174 seconds.
Processed 204500 rows. Time used: 30684.117369890213 seconds.
Processed 205000 rows. Time used: 30751.70182609558 seconds.
Processed 205500 rows. Time used: 30821.380759954453 seconds.
Processed 206000 rows. Time used: 30886.283309698105 seconds.
Processed 206500 rows. Time used: 30954.64509987831 seconds.
Processed 207000 rows. Time used: 31020.277433395386 seconds.
Processed 207500 rows. Time used: 31089.52279663086 seconds.
Processed 208000 rows. Time used: 31155.97578382492 seconds.
Processed 208500 rows. Time used: 31224.452859163284 seconds.
Processed 209000 rows. Time used: 31291.221195220947 seconds.
Processed 209500 rows. Time used: 31359.492974996567 seconds.
Processed 210000 rows. Time used: 31425.999745368958 seconds.
Processed 210

Processed 269500 rows. Time used: 39465.87849164009 seconds.
Processed 270000 rows. Time used: 39531.42449235916 seconds.
Processed 270500 rows. Time used: 39599.999319553375 seconds.
Processed 271000 rows. Time used: 39668.03168940544 seconds.
Processed 271500 rows. Time used: 39737.36000442505 seconds.
Processed 272000 rows. Time used: 39803.98690319061 seconds.
Processed 272500 rows. Time used: 39872.103209495544 seconds.
Processed 273000 rows. Time used: 39938.540548563 seconds.
Processed 273500 rows. Time used: 40006.15085935593 seconds.
Processed 274000 rows. Time used: 40072.757759809494 seconds.
Processed 274500 rows. Time used: 40140.61948180199 seconds.
Processed 275000 rows. Time used: 40207.41582989693 seconds.
Processed 275500 rows. Time used: 40276.221155166626 seconds.
Processed 276000 rows. Time used: 40341.958409786224 seconds.
Processed 276500 rows. Time used: 40410.10023021698 seconds.
Processed 277000 rows. Time used: 40475.23414349556 seconds.
Processed 277500 rows

Processed 336500 rows. Time used: 48508.64219856262 seconds.
Processed 337000 rows. Time used: 48574.846813201904 seconds.
Processed 337500 rows. Time used: 48642.72650003433 seconds.
Processed 338000 rows. Time used: 48711.41902565956 seconds.
Processed 338500 rows. Time used: 48780.424088954926 seconds.
Processed 339000 rows. Time used: 48847.111286878586 seconds.
Processed 339500 rows. Time used: 48915.80391192436 seconds.
Processed 340000 rows. Time used: 48981.22730135918 seconds.
Processed 340500 rows. Time used: 49049.913796424866 seconds.
Processed 341000 rows. Time used: 49116.32535195351 seconds.
Processed 341500 rows. Time used: 49185.067298173904 seconds.
Processed 342000 rows. Time used: 49250.372400045395 seconds.
Processed 342500 rows. Time used: 49319.04591488838 seconds.
Processed 343000 rows. Time used: 49385.07345581055 seconds.
Processed 343500 rows. Time used: 49453.7150220871 seconds.
Processed 344000 rows. Time used: 49520.65161323547 seconds.
Processed 344500 ro

Processed 403500 rows. Time used: 57551.150076150894 seconds.
Processed 404000 rows. Time used: 57617.389927864075 seconds.
Processed 404500 rows. Time used: 57685.730907678604 seconds.
Processed 405000 rows. Time used: 57753.69710302353 seconds.
Processed 405500 rows. Time used: 57822.97823905945 seconds.
Processed 406000 rows. Time used: 57888.73365521431 seconds.
Processed 406500 rows. Time used: 57957.63275909424 seconds.
Processed 407000 rows. Time used: 58023.3667216301 seconds.
Processed 407500 rows. Time used: 58091.755011081696 seconds.
Processed 408000 rows. Time used: 58157.87982773781 seconds.
Processed 408500 rows. Time used: 58225.400213479996 seconds.
Processed 409000 rows. Time used: 58292.1939702034 seconds.
Processed 409500 rows. Time used: 58360.44713616371 seconds.
Processed 410000 rows. Time used: 58426.52486085892 seconds.
Processed 410500 rows. Time used: 58493.81690073013 seconds.
Processed 411000 rows. Time used: 58559.94425368309 seconds.
Processed 411500 rows

Processed 470500 rows. Time used: 66593.62355661392 seconds.
Processed 471000 rows. Time used: 66658.87438464165 seconds.
Processed 471500 rows. Time used: 66729.62683343887 seconds.
Processed 472000 rows. Time used: 66796.68024516106 seconds.
Processed 472500 rows. Time used: 66865.34573173523 seconds.
Processed 473000 rows. Time used: 66931.88651251793 seconds.
Processed 473500 rows. Time used: 66999.94234514236 seconds.
Processed 474000 rows. Time used: 67066.85354304314 seconds.
Processed 474500 rows. Time used: 67135.30609345436 seconds.
Processed 475000 rows. Time used: 67201.55945277214 seconds.
Processed 475500 rows. Time used: 67269.27124738693 seconds.
Processed 476000 rows. Time used: 67335.53626155853 seconds.
Processed 476500 rows. Time used: 67404.55628538132 seconds.
Processed 477000 rows. Time used: 67470.53365087509 seconds.
Processed 477500 rows. Time used: 67539.01215291023 seconds.
Processed 478000 rows. Time used: 67604.6516289711 seconds.
Processed 478500 rows. Ti

Processed 538000 rows. Time used: 75717.76222991943 seconds.
Processed 538500 rows. Time used: 75786.64573645592 seconds.
Processed 539000 rows. Time used: 75853.54913377762 seconds.
Processed 539500 rows. Time used: 75922.57365894318 seconds.
Processed 540000 rows. Time used: 75988.40544009209 seconds.
Processed 540500 rows. Time used: 76057.39157676697 seconds.
Processed 541000 rows. Time used: 76122.84700655937 seconds.
Processed 541500 rows. Time used: 76191.99789214134 seconds.
Processed 542000 rows. Time used: 76258.17837715149 seconds.
Processed 542500 rows. Time used: 76327.54556775093 seconds.
Processed 543000 rows. Time used: 76393.49059081078 seconds.
Processed 543500 rows. Time used: 76461.56689977646 seconds.
Processed 544000 rows. Time used: 76527.60003471375 seconds.
Processed 544500 rows. Time used: 76595.6632194519 seconds.
Processed 545000 rows. Time used: 76665.15323829651 seconds.
Processed 545500 rows. Time used: 76733.79271388054 seconds.
Processed 546000 rows. Ti

Processed 605500 rows. Time used: 84836.04716300964 seconds.
Processed 606000 rows. Time used: 84902.00064468384 seconds.
Processed 606500 rows. Time used: 84970.98655128479 seconds.
Processed 607000 rows. Time used: 85038.06696248055 seconds.
Processed 607500 rows. Time used: 85106.50102519989 seconds.
Processed 608000 rows. Time used: 85173.00471401215 seconds.
Processed 608500 rows. Time used: 85240.72548389435 seconds.
Processed 609000 rows. Time used: 85306.81324768066 seconds.
Processed 609500 rows. Time used: 85376.1224758625 seconds.
Processed 610000 rows. Time used: 85443.20631432533 seconds.
Processed 610500 rows. Time used: 85510.81281638145 seconds.
Processed 611000 rows. Time used: 85578.70398426056 seconds.
Processed 611500 rows. Time used: 85647.33800554276 seconds.
Processed 612000 rows. Time used: 85712.52951025963 seconds.
Processed 612500 rows. Time used: 85781.73318004608 seconds.
Processed 613000 rows. Time used: 85847.74441099167 seconds.
Processed 613500 rows. Ti

In [44]:
#save these matrices    
for idx, mat in enumerate(D):
    np.save('./mat/n=3000_group'+str(idx+1),mat)