In [1]:
import pandas as pd
import geopandas as gpd
import shapely
import os
import json

# Add local files


In [2]:
working_dird = os.getcwd()
raw_data_folder = os.path.join(working_dird, 'data', 'raw')
output_dir = os.path.join(working_dird, 'data', 'output')
files = [os.path.join(raw_data_folder, x) for x in os.listdir(raw_data_folder)]

In [3]:
ct_dfs = [pd.read_csv(x) for x in files]

In [4]:
monthly_avg_price = ct_dfs[0]
monthly_reg_type = ct_dfs[1]
monthly_prod_sale = ct_dfs[2]
monthly_tax = ct_dfs[4]
approved_towns = ct_dfs[5]
shops = ct_dfs[6]

##### UNIFY PRODUCT TYPES


In [5]:
product_types_1 = list(monthly_reg_type['Product Type'].unique())
product_types_2 = list(monthly_prod_sale['Product Type'].unique())

product_types_2_dict = [product_types_2[-1], product_types_2[2], product_types_2[0], product_types_2[-2],
                        product_types_2[-1], product_types_2[-1], product_types_2[1], product_types_2[-1], product_types_2[-1]]

new_product_type_dict = {}

for x, y in list(zip(product_types_1, product_types_2_dict)):
    new_product_type_dict[x] = y
monthly_reg_type['Product Type'] = monthly_reg_type['Product Type'].apply(
    lambda x: new_product_type_dict[x])

##### LOCATIONS


In [6]:
shops['latitude'] = shops['Location'].apply(lambda x: shapely.from_wkt(x).y)
shops['longitude'] = shops['Location'].apply(lambda x: shapely.from_wkt(x).x)
shops['City'] = shops['City'].str.title()

In [7]:
usa_counties = gpd.read_file(
    "https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_USA_2.json")

In [8]:
conntt = usa_counties[usa_counties['NAME_1'] == 'Connecticut']
conntt = conntt[['GID_1', 'NAME_1', 'NAME_2', 'geometry']]
conntt.columns = ['id', 'state', 'county', 'geometry']
conntt = conntt.reset_index()

In [9]:
def getIntersectionId(point: shapely.Point, bnds: list[shapely.MultiPolygon]):
    id = 0
    length = len(bnds)
    while (id < length):
        if (bnds[id].contains(point)):
            break
        id += 1
    return id

In [10]:
bnds = list(conntt['geometry'])
counties = list(conntt['county'])
shops['counties'] = shops['Location'].apply(
    lambda x: counties[getIntersectionId(shapely.from_wkt(x), bnds)])

In [12]:
shops = shops[['Type', 'Business', 'Street',
               'Zipcode', 'City', 'counties', 'latitude', 'longitude']]

In [13]:
shops.columns = ['type', 'business', 'street',
                 'zipcode', 'city', 'county', 'latitude', 'longitude']

In [14]:
monthly_prod_sale['month'] = monthly_prod_sale['Month Ending'].apply(lambda x: x.split(' ')[
    0])
monthly_prod_sale['year'] = monthly_prod_sale['Month Ending'].apply(
    lambda x: x.split(' ')[1])
monthly_avg_price['month'] = monthly_avg_price['Month Ending'].apply(lambda x: x.split(' ')[
    0])
monthly_avg_price['year'] = monthly_avg_price['Month Ending'].apply(
    lambda x: x.split(' ')[1])
monthly_reg_type['month'] = monthly_reg_type['Month'].apply(
    lambda x: x.split(' ')[0])
monthly_reg_type['year'] = monthly_reg_type['Month'].apply(
    lambda x: x.split(' ')[1])

In [15]:
monthly_tax = monthly_tax[['Month', 'Calendar Year', 'Fiscal Year',
                           'Plant Material Tax', 'Edible Products Tax', 'Other Cannabis Tax']]

In [16]:
monthly_tax.columns = ['month', 'year', 'fiscal_year',
                       'plant_material_tax', 'edible_products_tax', 'other_cannabis__tax']

In [17]:
monthly_tax.to_json(os.path.join(
    output_dir, 'ct_monthly_tax.json'), orient='records')
monthly_avg_price.to_json(os.path.join(
    output_dir, 'ct_monthly_avg_price.json'), orient='records')
monthly_prod_sale.to_json(os.path.join(
    output_dir, 'ct_monthly_prod_sale.json'), orient='records')
monthly_reg_type.to_json(os.path.join(
    output_dir, 'ct_monthly_reg_type.json'), orient='records')
shops.to_json(os.path.join(
    output_dir, 'ct_shops.json'), orient='records')

In [18]:
conntt.to_file(os.path.join(
    output_dir, 'ct_counties.geojson'), driver='GeoJSON')