# **Drug Trafficking (2008 - 2011) Study Case**
<img src="./images/sof_insignia.png" width="350px">

In [25]:
import pandas as pd 
import numpy as np 
import json
import pymysql 
import sqlite3 
import os 
import boto3 
import warnings 
from sqlalchemy import create_engine
import gc 
import seaborn 
import matplotlib.pyplot as plt 

from typing import List, Dict 

warnings.filterwarnings('ignore')
plt.style.use('ggplot')

# **Helper Function**
## **To MySQL** 

In [3]:
def to_mysql(backend_engine: 'PyMsql', dir_path: str ):
    """
    Description
    -----------
        - Helper function to convert the CDE dataset into MySQL DB for further query

    Parameter
    ---------
        - backend_engine: a valid SQLAlchemy backend engine 
        - dir_path: a valid CDE path 

    Return
    ------
        - None 
    """

    if not os.path.exists(dir_path):
        raise FileNotFoundError(f"[ ERROR ] Unable to find the following CDE dataset path {dir_path}")

    try:
        
        for csv_file in os.listdir(dir_path):
            if "csv" in csv_file:
                tbl_name: str = csv_file.replace(".csv", "").strip()
                pd.read_csv(os.path.join(dir_path, csv_file) ).to_sql(tbl_name, con=backend_engine, if_exists='append' )

    except ConnectionError as e: 
        
        raise ConnectionError("[ ERROR ] The program has encountered the following error message. Please check your SQL connection !!!.") from e 

## **MySQL Cursor** 

In [4]:
con: 'MySQL' = pymysql.connect(user="root", passwd="admin123", host="127.0.0.1")
cursor = con.cursor() 

secrets: 'SecretsManager' = boto3.client("secretsmanager")
sql_arg: str = json.loads(secrets.get_secret_value(SecretId="afghan_project").get("SecretString")).get("sql_args")

## **Populate TX DB with Crimes Incidents between `2008 - 2011`**

In [50]:
engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "TX_2008") )
to_mysql(backend_engine=engine, dir_path="/tmp/TX/2008/")

engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "TX_2009") )
to_mysql(backend_engine=engine, dir_path="/tmp/TX/2009/")

engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "TX_2010") )
to_mysql(backend_engine=engine, dir_path="/tmp/TX/2010/")

engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "TX_2011") )
to_mysql(backend_engine=engine, dir_path="/tmp/TX/2011/")

In [11]:
engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "TX_2011") )
to_mysql(backend_engine=engine, dir_path="/tmp/TX/2011/")

## **Populate Arizona DB with Crimes Incidents between `2008 - 2011`**

In [9]:
engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "AZ_2008") )
to_mysql(backend_engine=engine, dir_path="/tmp/AZ/2008/")

engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "AZ_2009") )
to_mysql(backend_engine=engine, dir_path="/tmp/AZ/2009/")

engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "AZ_2010") )
to_mysql(backend_engine=engine, dir_path="/tmp/AZ/2010/")

engine: 'pymysql' = create_engine("%s%s" % (sql_arg, "AZ_2011") )
to_mysql(backend_engine=engine, dir_path="/tmp/AZ/2011/")

## **Index Table for All the Crimes in Az and Tx between `2008 - 2011`**
<img src="images/heroin_routes.png" width="1100px">

In [26]:
cde_events: Dict = dict(pd.read_sql("SHOW DATABASES", con=con)['Database'].apply(lambda row: row if 'AZ' in row or 'TX' in row else np.nan ).dropna().apply(lambda row:  (row, "%s%s" % (sql_arg, row) ) ).tolist() )

# **References** 
1. https://www.justice.gov/archive/ndic/pubs44/44849/44849p.pdf