# 2024-017 LUDU 2023

<b>Background:</b>  
The GIS team performs an annual land use and housing inventory update in the interest of maintaining a robust and accurate catalog of the existing conditions for any given year. This catalog of snapshots are the base year inputs to SANDAG’s Regional Demographic, Economic, and Land Use Models.

<b>QC Request:</b>  
Conduct attribute checks and trend checks to verify the data's accuracy.

<b>Notes:</b>  
* _X = previous
* _Y = current

<b>Author:</b> Dante Lee  
<b>Date Created:</b> 12/2/2022  

<b>Date Modified:</b> 2/27/2024     
<b>Modified by:</b> Dante Lee

## Data Prep

In [None]:
import pandas as pd
import sqlalchemy as sa

In [None]:
# Export copy of PREVIOUS LUDU

server = 'sql2014b8'
database = 'GeoDepot'
driver = 'ODBC Driver 17 for SQL Server'
url = f"mssql+pyodbc://{server}/{database}?trusted_connection=yes&driver={driver}"
engine = sa.create_engine(url)

query = """
        SELECT [LCKey],[parcelID],[subParcel],[lu],[du],[MGRA]
          FROM [GeoDepot].[gis].[LUDU2022]
        """

ludu_X = pd.read_sql(query, con=engine).add_suffix('_X')
#ludu_X.to_csv('ludu_X.csv')
ludu_X

In [None]:
# Export copy of CURRENT LUDU

server = 'sql2014b8'
database = 'WS'
driver = 'ODBC Driver 17 for SQL Server'
url = f"mssql+pyodbc://{server}/{database}?trusted_connection=yes&driver={driver}"
engine = sa.create_engine(url)

query = """
        SELECT [LCKey],[parcelID],[subParcel],[lu],[du],[MGRA]
          FROM [WS].[gis].[LUDU2023_DRAFT_20240223]
        """

ludu_Y = pd.read_sql(query, con=engine).add_suffix('_Y')
#ludu_Y.to_csv('ludu_Y.csv')
ludu_Y

## Attribute Checks

### ⚠️ Check value ranges

<b>QC Summary:</b>  
Value ranges are reasonable

In [None]:
# Check value ranges
# Note: ParcelID comes from the County. Negative values indicate that the parcel is covered by water.
print(ludu_Y[['LCKey_Y', 'parcelID_Y', 'subParcel_Y', 'lu_Y', 'du_Y', 'MGRA_Y']].describe().loc[['min', 'max']])

In [None]:
# Investigate issues with value ranges

ludu_Y[ludu_Y['du_Y'] < 0]

### ✅ Check values

<b>QC Summary:</b>  
Pass - No null values (except apn8). LCKey is unique

In [None]:
# Check for null values
print('NULL VALUES ----------')
print(ludu_Y.isna().any())

# Check count of unique values
print('\nUNIQUE VALUES ----------')
print(ludu_Y.nunique())

### ⚠️ No DU on select land uses

<b>QC Summary:</b>  
DU found on special cases. Most are consistent with ludu2022    

Investigate/confirm DU on: 1409, 4101, 4113

In [None]:
nodu = [1400, 1409, 1500, 1501, 1502, 9101, 4101, 4102, 4103, 4104, 4110, 
        4111, 4112, 4113, 4114, 4115, 4116, 4117, 4118, 4119, 4120, 6101, 
        7600, 7601, 7603, 7604, 7605, 7606, 7607, 7609, 9201, 9202]

# Previous LUDU - no DU on select land uses
ludu_X_nodu = ludu_X[ludu_X['lu_X'].isin(nodu)]
ludu_X_nodu = ludu_X_nodu.groupby(['lu_X'])['du_X'].sum().reset_index()

# Current LUDU - no DU on select land uses
ludu_Y_nodu = ludu_Y[ludu_Y['lu_Y'].isin(nodu)]
ludu_Y_nodu = ludu_Y_nodu.groupby(['lu_Y'])['du_Y'].sum().reset_index()

# Compare previous/current LUDU special cases
nodu_df = pd.merge(ludu_X_nodu, ludu_Y_nodu, how='outer', left_on='lu_X', right_on='lu_Y').fillna(0)
nodu_df[(nodu_df['du_Y'] > 0) | (nodu_df['du_X'] > 0)]


In [None]:
# Investigate special case records with du > 0
nodu_Y = [1409, 4101, 4113]

ludu_Y[(ludu_Y['lu_Y'].isin(nodu_Y)) & (ludu_Y['du_Y'] > 0)]

## Trend Checks

In [None]:
ludu_XY = pd.merge(ludu_X, ludu_Y, how='outer', left_on='LCKey_X', right_on='LCKey_Y')
ludu_XY = ludu_XY.fillna(0)
ludu_XY

In [None]:
# Calculate du diff between 22 and 21

du_diff = ludu_XY[['LCKey_Y', 'lu_Y', 'du_Y', 'LCKey_X', 'lu_X', 'du_X']]
du_diff['du_diff'] = du_diff['du_Y'] - du_diff['du_X']

### Dramatic changes in DU at the LCKey level (diff < -20 or > 20)

<b>QC Summary:</b>  


In [None]:
# Dramatic changes in DU

du_diff[(du_diff['du_diff'] < -20) | (du_diff['du_diff'] > 20)].sort_values(by='du_diff', ascending=False)

In [None]:
# Dramatic changes in DU where LU is the same

du_diff_samelu = du_diff[((du_diff['du_diff'] < -20) | (du_diff['du_diff'] > 20))  & (du_diff['lu_X'] == du_diff['lu_Y'])].sort_values(by='du_diff', ascending=False)
du_diff_samelu

In [None]:
# Dramatic changes in DU where LU is different

du_diff_difflu = du_diff[((du_diff['du_diff'] < -20) | (du_diff['du_diff'] > 20))  & (du_diff['lu_X'] != du_diff['lu_Y'])].sort_values(by='du_diff', ascending=False)
#du_diff_difflu.to_csv('du_diff_difflu_adu.csv', index=False)
du_diff_difflu

### LU consistency - number of LU changes

<b>QC Summary:</b>  
Information only - LU consistency report.

In [None]:
# Number of LU diffs between previous and current

du_diff[(du_diff['lu_X'] != du_diff['lu_Y'])].sort_values(by='LCKey_Y', ascending=False)

### MGRA15 Consistency Report

<b>QC Summary:</b>  

In [None]:
# Unique MGRAs in ludu21/ludu22
luduX_mgra = ludu_X['MGRA_X'].unique()
luduY_mgra = ludu_Y['MGRA_Y'].unique()

luduX_mgra_unique = set(luduX_mgra).difference(set(luduY_mgra))
print('Not in ludu22: ' + str(len(luduX_mgra_unique)))

luduY_mgra = ludu_Y['MGRA_Y'].unique()
luduY_mgra_unique = set(luduY_mgra).difference(set(luduX_mgra))
print('Not in ludu21: ' + str(len(luduY_mgra_unique)))

luduX_mgra_unique