In [16]:
import json
import numpy as np
import os
import pandas as pd
from scipy.stats import mode
from reV import Resource
from reV.utilities import init_logger
from reVX.reeds import ReedsClassifier, ReedsProfiles, ReedsTimeslices

init_logger('reVX.reeds')

test_dir = '/Users/mrossol/Git_Repos/reVX/tests/data'
out_dir = os.path.join(test_dir, 'reeds')
#out_dir = '/Users/mrossol/Downloads/'

In [3]:
import concurrent.futures as cf

def test(data, i):
    coeffs = np.corrcoef(data, data, rowvar=False)
    print("Coeffs for {} complete".format(i))
    return coeffs

out = {}
with cf.ProcessPoolExecutor(max_workers=None) as exe:
    futures = {}
    for i in range(10):
        data = np.random.random((1000, 40))
        future = exe.submit(test, data, i)
        futures[future] = i

    for i, future in enumerate(cf.as_completed(futures)):
        s = futures[future]
        coeffs = future.result()

        out[s] = coeffs
        print('Completed {} out of {} representative '
                    'profile timeslice stats futures.'
                    .format(i + 1, len(futures)))

Coeffs for 2 complete
Coeffs for 0 complete
Coeffs for 1 complete
Coeffs for 3 complete
Coeffs for 8 complete
Coeffs for 4 complete
Coeffs for 9 complete
Coeffs for 7 complete
Coeffs for 5 complete
Coeffs for 6 complete
Completed 1 out of 10 representative profile timeslice stats futures.
Completed 2 out of 10 representative profile timeslice stats futures.
Completed 3 out of 10 representative profile timeslice stats futures.
Completed 4 out of 10 representative profile timeslice stats futures.
Completed 5 out of 10 representative profile timeslice stats futures.
Completed 6 out of 10 representative profile timeslice stats futures.
Completed 7 out of 10 representative profile timeslice stats futures.
Completed 8 out of 10 representative profile timeslice stats futures.
Completed 9 out of 10 representative profile timeslice stats futures.
Completed 10 out of 10 representative profile timeslice stats futures.


In [None]:
rep_profiles = out_path = os.path.join(out_dir, 'ReEDS_Profiles.h5')
cf_profiles = os.path.join(test_dir, 'reV_gen', 'gen_pv_2012.h5')
time_slices = os.path.join(test_dir, 'reeds/inputs/timeslices.csv')
rev_table = os.path.join(out_dir, 'ReEDS_Classifications.csv')

test = ReedsTimeslices(rep_profiles, time_slices)

#test._rep_profile_stats(test.profiles, test.meta, test._timeslice_groups, max_workers=1)
test._rep_profile_stats(test.profiles, test.meta, test._timeslice_groups, max_workers=None)

INFO - 2019-12-11 14:30:20,515 [reeds_timeslices.py:273] : Profile data check complete.
INFO - 2019-12-11 14:30:20,523 [reeds_timeslices.py:330] : Extracted timeslice map.
INFO - 2019-12-11 14:30:20,729 [reeds_timeslices.py:482] : Computing representative profile timeslice stats for 16 timeslice groups.
INFO - 2019-12-11 14:30:20,730 [reeds_timeslices.py:484] : Computing timeslice stats with max_workers: 8
Start stats for 0
Start stats for 1
Start stats for 3
Start stats for 2
Start stats for 4
Start stats for 5
Data Pulled for 0
Data Pulled for 1
Start stats for 6
Data Pulled for 3
Data Pulled for 2
Start stats for 7
Data Pulled for 4
Data Pulled for 5
Data Pulled for 6
Data Pulled for 7


In [2]:
rep_profiles = out_path = os.path.join(out_dir, 'ReEDS_Profiles.h5')
cf_profiles = os.path.join(test_dir, 'reV_gen', 'gen_pv_2012.h5')
time_slices = os.path.join(test_dir, 'reeds/inputs/timeslices.csv')
rev_table = os.path.join(out_dir, 'ReEDS_Classifications.csv')

%time stats_rep, coeffs_rep = ReedsTimeslices.run(rep_profiles, time_slices, legacy_format=True, max_workers=1)

#%time stats_rep, coeffs_rep = ReedsTimeslices.run(rep_profiles, time_slices, legacy_format=True, max_workers=None)

INFO - 2019-12-11 13:52:08,815 [reeds_timeslices.py:273] : Profile data check complete.
INFO - 2019-12-11 13:52:08,823 [reeds_timeslices.py:330] : Extracted timeslice map.
INFO - 2019-12-11 13:52:09,021 [reeds_timeslices.py:483] : Computing representative profile timeslice stats for 16 timeslice groups.
INFO - 2019-12-11 13:52:09,022 [reeds_timeslices.py:485] : Computing timeslice stats with max_workers: 1
Start stats for 0
Pull data for 0
init Coeffs df for 0
compute coeffs for 0
Coeffs for 0 complete
INFO - 2019-12-11 13:52:09,065 [reeds_timeslices.py:525] : Completed 1 out of 16 representative profile timeslice stats.
Start stats for 1
Pull data for 1
init Coeffs df for 1
compute coeffs for 1
Coeffs for 1 complete
INFO - 2019-12-11 13:52:09,098 [reeds_timeslices.py:525] : Completed 2 out of 16 representative profile timeslice stats.
Start stats for 2
Pull data for 2
init Coeffs df for 2
compute coeffs for 2
Coeffs for 2 complete
INFO - 2019-12-11 13:52:09,132 [reeds_timeslices.py:52

  c /= stddev[:, None]


Start stats for 6
Pull data for 6
init Coeffs df for 6
compute coeffs for 6
Coeffs for 6 complete
INFO - 2019-12-11 13:52:09,269 [reeds_timeslices.py:525] : Completed 7 out of 16 representative profile timeslice stats.
Start stats for 7
Pull data for 7
init Coeffs df for 7
compute coeffs for 7
Coeffs for 7 complete
INFO - 2019-12-11 13:52:09,307 [reeds_timeslices.py:525] : Completed 8 out of 16 representative profile timeslice stats.
Start stats for 8
Pull data for 8
init Coeffs df for 8
compute coeffs for 8
Coeffs for 8 complete
INFO - 2019-12-11 13:52:09,350 [reeds_timeslices.py:525] : Completed 9 out of 16 representative profile timeslice stats.
Start stats for 9
Pull data for 9
init Coeffs df for 9
compute coeffs for 9
Coeffs for 9 complete
INFO - 2019-12-11 13:52:09,387 [reeds_timeslices.py:525] : Completed 10 out of 16 representative profile timeslice stats.
Start stats for 10
Pull data for 10
init Coeffs df for 10
compute coeffs for 10
Coeffs for 10 complete
INFO - 2019-12-11 13

KeyboardInterrupt: 

In [54]:
# def remove_bad_gids(row, thresh=99):
#     gids = row['gen_gids']
#     counts = row['gid_counts']
#     convert = False
#     if isinstance(gids, str):
#         convert = True
#         gids = json.loads(gids)
#         counts = json.loads(counts)
        
#     pos = np.where(np.array(gids) < thresh)[0]
    
#     gids = [gids[i] for i in pos]
#     counts = [counts[i] for i in pos]

#     if convert:
#         row['gen_gids'] = json.dumps(gids)
#         row['gid_counts'] = json.dumps(counts)
#     else:
#         row['gen_gids'] = gids
#         row['gid_counts'] = counts

#     row['sites'] = len(gids)

#     return row
     
    
# sc_table = '/Users/mrossol/Git_Repos/reV/tests/data/sc_out/sc_full_out_1.csv'
# sc_table = pd.read_csv(sc_table)
# sc_table = sc_table.apply(remove_bad_gids, axis=1)
# sc_table = sc_table.loc[sc_table['sites'] > 0].drop(columns=['sites'])

# out_path = os.path.join(test_dir, 'reV_sc', 'sc_table.csv')
# sc_table.to_csv(out_path, index=False)

In [17]:
rev_table = os.path.join(test_dir, 'reV_sc', 'sc_table.csv')
resource_classes = os.path.join(test_dir, 'reeds', 'inputs', 'trg_breakpoints_naris.csv')

table, agg = ReedsClassifier.create(rev_table, resource_classes, region_map='reeds_region',
                                    sc_bins=5)

out_path = os.path.join(out_dir, 'ReEDS_Classifications.csv')
table.to_csv(out_path, index=False)

out_path = os.path.join(out_dir, 'ReEDS_Aggregation.csv')
agg.to_csv(out_path, index=False)

In [18]:
cf_profiles = os.path.join(test_dir, 'reV_gen', 'gen_pv_2012.h5')
rev_table = os.path.join(out_dir, 'ReEDS_Classifications.csv')

out_path = os.path.join(out_dir, 'ReEDS_Profiles.h5')
ReedsProfiles.run(cf_profiles, rev_table, profiles_dset='cf_profile', rep_method='meanoid',
                  err_method='rmse', n_profiles=1, reg_cols=('region', 'class'),
                  parallel=False, fout=out_path, legacy_format=False)

INFO - 2019-12-11 14:40:04,746 [reeds_profiles.py:201] : Representative profiles complete!


({0: array([[0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         ...,
         [0.087, 0.086, 0.086, ..., 0.084, 0.083, 0.083],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ]], dtype=float32)},
     region  class rep_gen_gid rep_res_gid
 0       31      2          41     1318658
 1       11      2          76     1321872
 2       31      1          77     1321903
 3       11      1          86     1323879
 4       35      2          29     1317475
 5       35      1          81     1322022
 6       36      1          86     1323872
 7       34      3           4     1316416
 8       34      2          48     1319194
 9       15      2          79     1322010
 10      15      3          83     1322010
 11      10      1          83     1323263
 12      17      2          84     1324107
 13      34    

In [21]:
cf_profiles = os.path.join(test_dir, 'reV_gen', 'gen_pv_2012.h5')
rev_table = os.path.join(out_dir, 'ReEDS_Classifications.csv')

out_path = os.path.join(out_dir, 'ReEDS_Profiles.h5')
ReedsProfiles.run(cf_profiles, rev_table, profiles_dset='cf_profile', rep_method='meanoid',
                  err_method='rmse', n_profiles=3, reg_cols=('region', 'class'),
                  parallel=False, fout=out_path, legacy_format=False)

INFO - 2019-12-11 14:42:41,695 [reeds_profiles.py:201] : Representative profiles complete!


({0: array([[0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         ...,
         [0.087, 0.086, 0.086, ..., 0.084, 0.083, 0.083],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ]], dtype=float32),
  1: array([[0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         ...,
         [0.087, 0.086, 0.086, ..., 0.084, 0.08 , 0.083],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ]], dtype=float32),
  2: array([[0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         [0.   , 0.   , 0.   , ..., 0.   , 0.   , 0.   ],
         ...,
         [0.087, 0.087, 0.087, ..., 0.084, 0.

In [22]:
rep_profiles = out_path = os.path.join(out_dir, 'ReEDS_Profiles.h5')
cf_profiles = os.path.join(test_dir, 'reV_gen', 'gen_pv_2012.h5')
time_slices = os.path.join(test_dir, 'reeds/inputs/timeslices.csv')
rev_table = os.path.join(out_dir, 'ReEDS_Classifications.csv')

stats_rep, coeffs_rep = ReedsTimeslices.run(rep_profiles, time_slices, legacy_format=True, max_workers=1)
out_path = os.path.join(out_dir, 'ReEDS_Timeslice_rep_stats.csv')
stats_rep.to_csv(out_path, index=False)
out_path = os.path.join(out_dir, 'ReEDS_Timeslice_rep_coeffs.csv')
coeffs_rep.to_csv(out_path, index=False)

stats_cf, _ = ReedsTimeslices.run(cf_profiles, time_slices, rev_table=rev_table, max_workers=1,
                                  legacy_format=True)
out_path = os.path.join(out_dir, 'ReEDS_Timeslice_cf_stats.csv')
stats_cf.to_csv(out_path, index=False)

INFO - 2019-12-11 14:49:28,545 [reeds_timeslices.py:273] : Profile data check complete.
INFO - 2019-12-11 14:49:28,557 [reeds_timeslices.py:330] : Extracted timeslice map.
INFO - 2019-12-11 14:49:28,746 [reeds_timeslices.py:478] : Computing representative profile timeslice stats for 16 timeslice groups.
INFO - 2019-12-11 14:49:28,747 [reeds_timeslices.py:480] : Computing timeslice stats with max_workers: 1
INFO - 2019-12-11 14:49:28,780 [reeds_timeslices.py:520] : Completed 1 out of 16 representative profile timeslice stats.
INFO - 2019-12-11 14:49:28,810 [reeds_timeslices.py:520] : Completed 2 out of 16 representative profile timeslice stats.
INFO - 2019-12-11 14:49:28,837 [reeds_timeslices.py:520] : Completed 3 out of 16 representative profile timeslice stats.
INFO - 2019-12-11 14:49:28,862 [reeds_timeslices.py:520] : Completed 4 out of 16 representative profile timeslice stats.
INFO - 2019-12-11 14:49:28,888 [reeds_timeslices.py:520] : Completed 5 out of 16 representative profile ti