In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from pysandag.database import get_connection_string

db_connection_string = get_connection_string('..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

In [None]:
sched_dev_parcel_sql = '''
SELECT [site_id]
      ,[parcel_id]
      ,[capacity_3]
      ,[sfu_effective_adj]
      ,[mfu_effective_adj]
      ,[mhu_effective_adj]
      ,[notes]
      ,[editor]
  FROM [urbansim].[urbansim].[scheduled_development_parcel]
  WHERE capacity_3 > 0
'''

sched_dev_df = pd.read_sql(sched_dev_parcel_sql, mssql_engine)

In [None]:
yr_for_each_site = sched_dev_df.drop_duplicates(subset='site_id',keep='first').copy()
# yr_for_each_site.head()
len(yr_for_each_site)

In [None]:
sites = sched_dev_df.site_id.unique()
len(sites)

In [None]:
np.random.seed(7)
random_num = np.random.randint(1,21,size=(len(sites)))
# random_num

In [None]:
 d = {1: 2017, 2: 2018, 3: 2019, 4: 2020, 5: 2021, 6: 2022, 7: 2023, 8: 2024,\
      9: 2025, 10: 2017, 11: 2018, 12: 2019, 13: 2020, 14: 2021, 15: 2022,\
      16: 2023, 17: 2018, 18: 2019, 19: 2020, 20: 2021}
random_yr = [d.get(x) for x in random_num]
# random_yr

In [None]:
yr_for_each_site['yr'] = random_yr
yr_for_each_site = yr_for_each_site[['site_id','yr']].copy()
# yr_for_each_site.head()
pd.DataFrame({'sites_by_year': yr_for_each_site.
              groupby(["yr"]).site_id.size()}).reset_index()

In [None]:
sched_dev_w_year = pd.merge(sched_dev_df,yr_for_each_site, \
                            left_on='site_id',right_on='site_id',how='left')
# sched_dev_w_year.head()
pd.DataFrame({'units_by_year': sched_dev_w_year.
              groupby(["yr"]).capacity_3.sum()}).reset_index()

In [None]:
sched_dev_w_year['sched_version_id'] = 1

In [None]:
sched_dev_for_db = sched_dev_w_year[['sched_version_id','parcel_id','yr',\
                                     'site_id','capacity_3']].copy()

In [None]:
len(sched_dev_for_db)

In [None]:
# sched_dev_for_db.head()

In [None]:
sched_dev_for_db.capacity_3.sum()

In [None]:
sched_dev_for_db.to_sql(name='scheduled_development_do_not_use', \
                        con=mssql_engine, schema='urbansim', index=False,if_exists='replace')