In [1]:
### Import Packages

# File manipulation

import os # For working with Operating System
import requests # Accessing URLS

# Database 

import psycopg2

# Analysis

import arcpy
import pandas as pd

# Get CWD

cwd = os.getcwd() # This is a global variable for where the notebook is (must change if running in arcpro)

# Create GeoDataBase
# This is the communal GeoDataBase, only run once

if not os.path.exists(os.path.join(cwd, '..', '..', 'data', 'QAQC.gdb')): # If it doesn't exist, create it

    arcpy.management.CreateFileGDB(os.path.join(cwd, '..', '..', 'data'), 'QAQC')

# Make it workspace

arcpy.env.workspace = os.path.join(cwd, '..', '..', 'data', 'QAQC.gdb')

arcpy.env.overwriteOutput = True # Overwrite layers is okay

In [3]:
# Get Data

msp_url= 'https://www.ncei.noaa.gov/data/normals-hourly/1991-2020/access/USW00014922.csv'

response = requests.get(msp_url)

msp_wind= pd.read_csv(msp_url, usecols=
               ['STATION', 'LATITUDE', 'LONGITUDE', 'DATE', 'month', 'day', 'hour', 'HLY-WIND-AVGSPD', 'HLY-WIND-VCTDIR'])

# rename the 'old_name' column to 'new_name'
msp_wind = msp_wind.rename(columns={'month': 'MONTH', 'hour':'HOUR', 'day':'DAY', 'HLY-WIND-AVGSPD':'HLY_WIND_AVGSPD', 'HLY-WIND-VCTDIR':'HLY_WIND_VCDIR'})
msp_wind

Unnamed: 0,STATION,LATITUDE,LONGITUDE,DATE,MONTH,DAY,HOUR,HLY_WIND_AVGSPD,HLY_WIND_VCDIR
0,USW00014922,44.8831,-93.2289,01-01T00:00:00,1,1,0,8.4,293.0
1,USW00014922,44.8831,-93.2289,01-01T01:00:00,1,1,1,8.5,288.0
2,USW00014922,44.8831,-93.2289,01-01T02:00:00,1,1,2,8.5,296.0
3,USW00014922,44.8831,-93.2289,01-01T03:00:00,1,1,3,8.4,298.0
4,USW00014922,44.8831,-93.2289,01-01T04:00:00,1,1,4,8.4,296.0
...,...,...,...,...,...,...,...,...,...
8755,USW00014922,44.8831,-93.2289,12-31T19:00:00,12,31,19,9.3,283.0
8756,USW00014922,44.8831,-93.2289,12-31T20:00:00,12,31,20,9.0,284.0
8757,USW00014922,44.8831,-93.2289,12-31T21:00:00,12,31,21,9.0,293.0
8758,USW00014922,44.8831,-93.2289,12-31T22:00:00,12,31,22,8.7,291.0


In [4]:
# Define a function to check if wind speed is between 0 and 100
def check_range(value):
    if value >= 0 and value <= 100:
        return 0
    else:
        return 1
    
msp_wind['ERROR_WINDSPD'] = msp_wind['HLY_WIND_AVGSPD'].apply(check_range)

          STATION  LATITUDE  LONGITUDE            DATE  MONTH  DAY  HOUR  \
0     USW00014922   44.8831   -93.2289  01-01T00:00:00      1    1     0   
1     USW00014922   44.8831   -93.2289  01-01T01:00:00      1    1     1   
2     USW00014922   44.8831   -93.2289  01-01T02:00:00      1    1     2   
3     USW00014922   44.8831   -93.2289  01-01T03:00:00      1    1     3   
4     USW00014922   44.8831   -93.2289  01-01T04:00:00      1    1     4   
...           ...       ...        ...             ...    ...  ...   ...   
8755  USW00014922   44.8831   -93.2289  12-31T19:00:00     12   31    19   
8756  USW00014922   44.8831   -93.2289  12-31T20:00:00     12   31    20   
8757  USW00014922   44.8831   -93.2289  12-31T21:00:00     12   31    21   
8758  USW00014922   44.8831   -93.2289  12-31T22:00:00     12   31    22   
8759  USW00014922   44.8831   -93.2289  12-31T23:00:00     12   31    23   

      HLY_WIND_AVGSPD  HLY_WIND_VCDIR  ERROR_WINDSPD  
0                 8.4           

In [5]:
#define a function to check if wind direction is between 0 and 360
def check_range(value):
    if value >= 0 and value <=360:
        return 0
    else:
        return 1

msp_wind['ERROR_WINDVCTR'] = msp_wind['HLY_WIND_VCDIR'].apply(check_range)

          STATION  LATITUDE  LONGITUDE            DATE  MONTH  DAY  HOUR  \
0     USW00014922   44.8831   -93.2289  01-01T00:00:00      1    1     0   
1     USW00014922   44.8831   -93.2289  01-01T01:00:00      1    1     1   
2     USW00014922   44.8831   -93.2289  01-01T02:00:00      1    1     2   
3     USW00014922   44.8831   -93.2289  01-01T03:00:00      1    1     3   
4     USW00014922   44.8831   -93.2289  01-01T04:00:00      1    1     4   
...           ...       ...        ...             ...    ...  ...   ...   
8755  USW00014922   44.8831   -93.2289  12-31T19:00:00     12   31    19   
8756  USW00014922   44.8831   -93.2289  12-31T20:00:00     12   31    20   
8757  USW00014922   44.8831   -93.2289  12-31T21:00:00     12   31    21   
8758  USW00014922   44.8831   -93.2289  12-31T22:00:00     12   31    22   
8759  USW00014922   44.8831   -93.2289  12-31T23:00:00     12   31    23   

      HLY_WIND_AVGSPD  HLY_WIND_VCDIR  ERROR_WINDSPD  ERROR_WINDVCTR  
0               

In [6]:
#making a new column with wind intensity
def check_range(value):
    if value >= 0 and value <=10:
        return 1
    if value >10 and value <=20:
        return 2
    if value >20 and value <=30:
        return 3
    if value >30 and value <=100:
        return 4
    else:
        return 0

msp_wind['WIND_INTENSITY'] = msp_wind['HLY_WIND_AVGSPD'].apply(check_range)

          STATION  LATITUDE  LONGITUDE            DATE  MONTH  DAY  HOUR  \
0     USW00014922   44.8831   -93.2289  01-01T00:00:00      1    1     0   
1     USW00014922   44.8831   -93.2289  01-01T01:00:00      1    1     1   
2     USW00014922   44.8831   -93.2289  01-01T02:00:00      1    1     2   
3     USW00014922   44.8831   -93.2289  01-01T03:00:00      1    1     3   
4     USW00014922   44.8831   -93.2289  01-01T04:00:00      1    1     4   
...           ...       ...        ...             ...    ...  ...   ...   
8755  USW00014922   44.8831   -93.2289  12-31T19:00:00     12   31    19   
8756  USW00014922   44.8831   -93.2289  12-31T20:00:00     12   31    20   
8757  USW00014922   44.8831   -93.2289  12-31T21:00:00     12   31    21   
8758  USW00014922   44.8831   -93.2289  12-31T22:00:00     12   31    22   
8759  USW00014922   44.8831   -93.2289  12-31T23:00:00     12   31    23   

      HLY_WIND_AVGSPD  HLY_WIND_VCDIR  ERROR_WINDSPD  ERROR_WINDVCTR  \
0              

In [7]:
#making a column with categories for wind direction

def check_range(value):
    if value >= 0 and value <=45:
        return 1
    if value >45 and value <=90:
        return 2
    if value >90 and value <=135:
        return 3
    if value >135 and value <=180:
        return 4
    if value >180 and value <=225:
        return 5
    if value >225 and value <=270:
        return 6
    if value >270 and value <=315:
        return 7
    if value >315 and value <=360:
        return 8
    else:
        return 0

msp_wind['WIND_VCT_CATEGORY'] = msp_wind['HLY_WIND_VCDIR'].apply(check_range)

In [8]:
# convert the latitude and longitude columns to WKT
msp_wind['WKT'] = 'POINT (' + msp_wind['LONGITUDE'].astype(str) + ' ' + msp_wind['LATITUDE'].astype(str) + ')'

Unnamed: 0,STATION,LATITUDE,LONGITUDE,DATE,MONTH,DAY,HOUR,HLY_WIND_AVGSPD,HLY_WIND_VCDIR,ERROR_WINDSPD,ERROR_WINDVCTR,WIND_INTENSITY,WIND_VCT_CATEGORY,WKT
0,USW00014922,44.8831,-93.2289,01-01T00:00:00,1,1,0,8.4,293.0,0,0,1,7,POINT (-93.2289 44.8831)
1,USW00014922,44.8831,-93.2289,01-01T01:00:00,1,1,1,8.5,288.0,0,0,1,7,POINT (-93.2289 44.8831)
2,USW00014922,44.8831,-93.2289,01-01T02:00:00,1,1,2,8.5,296.0,0,0,1,7,POINT (-93.2289 44.8831)
3,USW00014922,44.8831,-93.2289,01-01T03:00:00,1,1,3,8.4,298.0,0,0,1,7,POINT (-93.2289 44.8831)
4,USW00014922,44.8831,-93.2289,01-01T04:00:00,1,1,4,8.4,296.0,0,0,1,7,POINT (-93.2289 44.8831)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,USW00014922,44.8831,-93.2289,12-31T19:00:00,12,31,19,9.3,283.0,0,0,1,7,POINT (-93.2289 44.8831)
8756,USW00014922,44.8831,-93.2289,12-31T20:00:00,12,31,20,9.0,284.0,0,0,1,7,POINT (-93.2289 44.8831)
8757,USW00014922,44.8831,-93.2289,12-31T21:00:00,12,31,21,9.0,293.0,0,0,1,7,POINT (-93.2289 44.8831)
8758,USW00014922,44.8831,-93.2289,12-31T22:00:00,12,31,22,8.7,291.0,0,0,1,7,POINT (-93.2289 44.8831)


In [18]:
# Convert the 'date' column to a standard format. Setting year as 2020 even though these are normals

msp_wind['DATE'] = pd.to_datetime(msp_wind['DATE'], format='%m-%dT%H:%M:%S').apply(lambda x: x.replace(year=2020))

msp_wind

## Upload to local and remote Databases

In [21]:
### Local GDB

# Initialize Feature Class

table_name = 'WIND_HISTORIC'
geom_type = 'POINT'
out_coordinate_system = arcpy.SpatialReference(26915)

arcpy.management.CreateFeatureclass(arcpy.env.workspace, table_name, geom_type, 
                                    spatial_reference = out_coordinate_system)

# Initialize Fields

cols_for_gdb = ['STATION', 'DATE', 'HLY_WIND_AVGSPD', 'HLY_WIND_VCDIR', 
                'ERROR_WINDSPD', 'ERROR_WINDVCTR', 'WIND_INTENSITY',
                'WIND_VCT_CATEGORY']

dtypes_for_gdb = ['TEXT', 'DATE', 'FLOAT', 'FLOAT',
                  'SHORT', 'SHORT', 'SHORT', 'SHORT']

field_desc = list(zip(cols_for_gdb, dtypes_for_gdb))

arcpy.management.AddFields(table_name, field_desc)

# Insert into table

with arcpy.da.InsertCursor(table_name, cols_for_gdb + ['SHAPE@']) as cursor:

    for i, df_row in msp_wind.iterrows():

        row = [None] * (len(cols_for_gdb)+1)
        
        row[:8] = df_row[cols_for_gdb]

        # Get point
        
        pt = arcpy.FromWKT(df_row['WKT'], arcpy.SpatialReference(4326)).projectAs(out_coordinate_system)

        row[-1] = pt
        
        cursor.insertRow(row)

In [19]:
# Connect to the database

# Get credentials

cred_pth = os.path.join(os.getcwd(), '..', '..', 'database', 'db_credentials.txt')

with open(cred_pth, 'r') as f:
    
    creds = f.readlines()[0].rstrip('\n').split(', ')
    
# Connect to PostGIS Database

pg_connection_dict = dict(zip(['dbname', 'user', 'password', 'port', 'host'], creds))

conn = psycopg2.connect(**pg_connection_dict)

cur = conn.cursor()

print('connection successful')

# iterate over the dataframe and insert each row into the database using a SQL INSERT statement
for index, row in msp_wind.iterrows():
    cur.execute('''
    INSERT INTO WIND_HISTORIC (STATION, DATE, HLY_WIND_AVGSPD, HLY_WIND_VCDIR, ERROR_WINDSPD, ERROR_WINDVCTR, WIND_INTENSITY, WIND_VCT_CATEGORY, geometry) 
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, ST_SetSRID(ST_GeomFromText(%s), 4326)::geometry) 
    ''', (row['STATION'], row['DATE'], row['HLY_WIND_AVGSPD'], row['HLY_WIND_VCDIR'], row['ERROR_WINDSPD'], row['ERROR_WINDVCTR'], row['WIND_INTENSITY'], row['WIND_VCT_CATEGORY'], row['WKT']))
    conn.commit()
# commit the changes to the database and close the cursor and connection
cur.close()
conn.close()

print('changes committed')

connection successful
