In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, ForeignKey
from pprint import pprint as pp

In [None]:
noaa_measurement_ids = [
    {'measurement_id': 'None', 'description': 'no measurement information applicable'},
    {'measurement_id': 'B', 'description': 'precipitation total formed from two 12-hour totals'},
    {'measurement_id': 'D', 'description': 'precipitation total formed from four six-hour totals'},
    {'measurement_id': 'H', 'description': 'represents highest or lowest hourly temperature (TMAX or TMIN) or the average of hourly values (TAVG)'},
    {'measurement_id': 'K', 'description': 'converted from knots'},
    {'measurement_id': 'L', 'description': 'temperature appears to be lagged with respect to reported hour of observation'},
    {'measurement_id': 'O', 'description': 'converted from oktas'},
    {'measurement_id': 'P', 'description': 'identified as “missing presumed zero” in DSI 3200 and 3206'},
    {'measurement_id': 'T', 'description': 'trace of precipitation, snowfall, or snow depth'},
    {'measurement_id': 'W', 'description': 'converted from 16-point WBAN code (for wind direction)'},
]
noaa_data_quality_ids = [
    {'data_quality_id': 'None', 'description': 'did not fail any quality assurance check'},
    {'data_quality_id': 'D', 'description': 'failed duplicate check'},
    {'data_quality_id': 'G', 'description': 'failed gap check'},
    {'data_quality_id': 'I', 'description': 'failed internal consistency check'},
    {'data_quality_id': 'K', 'description': 'failed streak/frequent-value check'},
    {'data_quality_id': 'L', 'description': 'failed check on length of multiday period'},
    {'data_quality_id': 'M', 'description': 'failed mega consistency check'},
    {'data_quality_id': 'N', 'description': 'failed naught check'},
    {'data_quality_id': 'O', 'description': 'failed climatological outlier check'},
    {'data_quality_id': 'R', 'description': 'failed lagged range check'},
    {'data_quality_id': 'S', 'description': 'failed spatial consistency check'},
    {'data_quality_id': 'T', 'description': 'failed temporal consistency check'},
    {'data_quality_id': 'W', 'description': 'temperature too warm for snow'},
    {'data_quality_id': 'X', 'description': 'failed bounds check'},
    {'data_quality_id': 'Z', 'description': 'flagged as a result of an official Datzilla Investigation'},
]
noaa_source_ids = [
    {'source_id': 'None', 'description': 'No source (i.e., data value missing)'},
    {'source_id': '0', 'description': 'U.S. Cooperative Summary of the Day (NCDC DSI-3200)'},
    {'source_id': '6', 'description': 'CDMP Cooperative Summary of the Day (NCDC DSI-3206)'},
    {'source_id': '7', 'description': 'U.S. Cooperative Summary of the Day - Transmitted via WxCoder3 (NCDC SI-3207)'},
    {'source_id': 'A', 'description': 'U.S. Automated Surface Observing System (ASOS) real-time data (since January 1, 2006)'},
    {'source_id': 'a', 'description': 'Australian data from the Australian Bureau of Meteorology'},
    {'source_id': 'B', 'description': 'U.S. ASOS data for October 2000-December 2005 (NCDC DSI-3211)'},
    {'source_id': 'b', 'description': 'Belarus update'},
    {'source_id': 'C', 'description': 'Environment Canada'},
    {'source_id': 'E', 'description': 'European Climate Assessment and Dataset (Klein Tank et al., 2002)'},
    {'source_id': 'F', 'description': 'U.S. Fort data'},
    {'source_id': 'G', 'description': 'Official Global Climate Observing System (GCOS) or other government-supplied data'},
    {'source_id': 'H', 'description': 'High Plains Regional Climate Center real-time data'},
    {'source_id': 'I', 'description': 'International collection (non U.S. data received through personal contacts)'},
    {'source_id': 'K', 'description': 'U.S. Cooperative Summary of the Day data digitized from paper observer forms (from 2011 to present)'},
    {'source_id': 'M', 'description': 'Monthly METAR Extract (additional ASOS data)'},
    {'source_id': 'N', 'description': 'Community Collaborative Rain, Hail,and Snow (CoCoRaHS)'},
    {'source_id': 'Q', 'description': 'Data from several African countries that had been “quarantined”, that is, withheld from public release until permission was granted from the respective meteorological services'},
    {'source_id': 'R', 'description': 'NCEI Reference Network Database (Climate Reference Network and Regional Climate Reference Network)'},
    {'source_id': 'r', 'description': 'All-Russian Research Institute of Hydro-meteorological Information-World Data Center'},
    {'source_id': 'S', 'description': 'Global Summary of the Day (NCDC DSI-9618)NOTE: “S” values are derived from hourly synoptic reports exchanged on the Global Telecommunications System (GTS). Daily values derived in this fashion may differ significantly from “true” daily data, particularly for precipitation (i.e., use with caution).'},
    {'source_id': 's', 'description': 'China Meteorological Administration/National Meteorological Information Center/Climatic Data Center (http://cdc.cma.gov.cn)'},
    {'source_id': 'T', 'description': "SNOwpack TELemtry (SNOTEL) data obtained from the U.S. Department of Agriculture's Natural Resources Conservation Service"},
    {'source_id': 'U', 'description': 'Remote Automatic Weather Station (RAWS) data obtained from the Western Regional Climate Center'},
    {'source_id': 'u', 'description': 'Ukraine update'},
    {'source_id': 'W', 'description': "WBAN/ASOS Summary of the Day from NCDC's Integrated Surface Data (ISD)."},
    {'source_id': 'X', 'description': 'U.S. First-Order Summary of the Day (NCDC DSI-3210)'},
    {'source_id': 'Z', 'description': 'Datzilla official additions or replacements'},
    {'source_id': 'z', 'description': 'Uzbekistan update'},
]
noaa_measurement_ids = pd.DataFrame(noaa_measurement_ids)
noaa_data_quality_ids = pd.DataFrame(noaa_data_quality_ids)
noaa_source_ids = pd.DataFrame(noaa_source_ids)


In [None]:

class DatabaseInterface:
    def __init__(self,
                 db_name,
                 user,
                 password,
                 host='localhost',
                 port=3306,
                 driver='mysql+pymysql'):
        
        self.con = create_engine(f'{driver}://{user}:{password}@{host}')
        self.con.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}")
        self.db_engine = create_engine(f"{driver}://{user}:{password}@{host}:{port}/{db_name}", echo=True)

    def insert_data(self, df: pd.DataFrame, table_name: str, if_exists: str = 'append'):
        df.to_sql(table_name, self.db_engine, if_exists=if_exists, index=False)

    def close_connection(self):
        self.db_engine.dispose()
    

In [None]:
    
dbi = DatabaseInterface(db_name='crop_yield_prediction', 
                        user=local_mysql_user, 
                        password=local_mysql_password)
meta = MetaData()

dbi.db_engine.execute(f"DROP TABLE IF EXISTS noaa_measurements")
dbi.db_engine.execute(f"DROP TABLE IF EXISTS noaa_data_quality")
dbi.db_engine.execute(f"DROP TABLE IF EXISTS noaa_sources")

noaa_measurement_ids_table = Table(
   'noaa_measurements', meta, 
   Column('measurement_id', String(4), primary_key = True), 
   Column('description', String(500)),
   Column('units', String(50)),
)

noaa_quality_ids_table = Table(
   'noaa_data_quality', meta, 
   Column('data_quality_id', String(4), primary_key = True), 
   Column('description', String(500)),
   Column('units', String(50)),
)

noaa_source_ids_table = Table(
   'noaa_sources', meta, 
   Column('source_id', String(4), primary_key = True), 
   Column('description', String(500)),
   Column('units', String(50)),
)

meta.create_all(dbi.db_engine)

print("Inserting noaa_measurements")
dbi.insert_data(noaa_measurement_ids, 'noaa_measurements')

print("Inserting noaa_data_quality")
dbi.insert_data(noaa_data_quality_ids, 'noaa_data_quality')

print("Inserting noaa_sources")
dbi.insert_data(noaa_source_ids, 'noaa_sources')