In [34]:
import pandas as pd

In [35]:
# Read agents and states
agents = pd.read_pickle("../../input_agents/agent_df_base_res_national_load_adjusted.pkl").reset_index(drop=False)
states = pd.read_csv("../../../states.csv", header = None, names = ['state_abbr', 'state_name'])

# Join to state name
agents = agents.merge(states, on='state_abbr', how='left')

# Sort by number of agents per state
sorted = agents.groupby(['state_abbr', 'state_name'], as_index = False).agg(agent_count=('agent_id', 'count')).sort_values(by='agent_count', ascending=False)

In [36]:
# Define large and small states
large_states = sorted[sorted['agent_count'] > 500]
mid_states = sorted[(sorted['agent_count'] > 100) & (sorted['agent_count'] <= 500)]
small_states = sorted[sorted['agent_count'] <= 100]

# Write states to CSV
large_states[['state_abbr', 'state_name']].to_csv("../../../large_states.csv", index=False, header=False)
mid_states[['state_abbr', 'state_name']].to_csv("../../../mid_states.csv", index=False, header=False)
small_states[['state_abbr', 'state_name']].to_csv("../../../small_states.csv", index=False, header=False)

# Write test csvs
large_states[['state_abbr', 'state_name']].sample(n=2, random_state=42).to_csv("../../../large_states_test.csv", index=False, header=False)
mid_states[['state_abbr', 'state_name']].sample(n=3, random_state=42).to_csv("../../../mid_states_test.csv", index=False, header=False)
small_states[['state_abbr', 'state_name']].sample(n=3, random_state=42).to_csv("../../../small_states_test.csv", index=False, header=False) 

# Overall states
sorted[['state_abbr', 'state_name']].to_csv("../../../states.csv", index=False, header=False)
sorted[['state_abbr', 'state_name']].sample(n=10, random_state=42).to_csv("../../../states_test.csv", index=False, header=True)


In [37]:
# Upload to GCE
!gsutil cp ../../../large_states.csv gs://dgen-assets/large_states.csv
!gsutil cp ../../../mid_states.csv gs://dgen-assets/mid_states.csv
!gsutil cp ../../../small_states.csv gs://dgen-assets/small_states.csv
!gsutil cp ../../../large_states_test.csv gs://dgen-assets/large_states_test.csv
!gsutil cp ../../../mid_states_test.csv gs://dgen-assets/mid_states_test.csv
!gsutil cp ../../../small_states_test.csv gs://dgen-assets/small_states_test.csv
!gsutil cp ../../../states.csv gs://dgen-assets/states.csv
!gsutil cp ../../../states_test.csv gs://dgen-assets/states_test.csv

Copying file://../../../large_states.csv [Content-Type=text/csv]...
/ [1 files][  196.0 B/  196.0 B]                                                
Operation completed over 1 objects/196.0 B.                                      
Copying file://../../../mid_states.csv [Content-Type=text/csv]...
/ [1 files][  339.0 B/  339.0 B]                                                
Operation completed over 1 objects/339.0 B.                                      
Copying file://../../../small_states.csv [Content-Type=text/csv]...
/ [1 files][   67.0 B/   67.0 B]                                                
Operation completed over 1 objects/67.0 B.                                       
Copying file://../../../large_states_test.csv [Content-Type=text/csv]...
/ [1 files][   23.0 B/   23.0 B]                                                
Operation completed over 1 objects/23.0 B.                                       
Copying file://../../../mid_states_test.csv [Content-Type=text/csv]...
/ 