In [3]:
# Connect to ReportManager, 1542 server and the ReportDB database
import pyodbc

conn = pyodbc.connect(
    'DRIVER={SQL Server};'
    'SERVER=reportmanager,1542;'
    'DATABASE=ReportDB;'
    'Trusted_Connection=yes;'
)

In [2]:
# Import the Russian River POD List and whittle it down
import pandas as pd
rr_pod_file_path = r'C:\Users\palemi\Water Boards\Supply and Demand Assessment - Documents\Program Watersheds\1. Watershed Folders\Test Watersheds\Test_Russian\Data\GIS_Preprocessing\TR_GIS_Preprocessing_2025-03-17.xlsx'
rr_pods = pd.read_excel(rr_pod_file_path, sheet_name = 'Final_POD_List')

# Remove all columns except for APPLICATION_NUMBER

rr_pods = rr_pods[['APPLICATION_NUMBER']]
# Rename the APPLICATION_NUMBER field in rr_pods to app_num
rr_pods.rename(columns={'APPLICATION_NUMBER': 'app_number'}, inplace=True)

# Grab unique application numbers
rr_pods = rr_pods.drop_duplicates()

In [6]:
import pandas as pd

# Inner Join rr_pods to the ewrims_flat_file SQL server table on the APPLICATION_NUMBER column
query1 = """
SELECT DISTINCT
    Application_Number AS app_number,
    Effective_Date
FROM ReportDB.FLAT_FILE.ewrims_flat_file
"""

query2 = """
SELECT DISTINCT *, Application_Number AS app_number
FROM ReportDB.FLAT_FILE.ewrims_flat_file
"""

# Read the SQL query results into DataFrames
ewrims_flat_file = pd.read_sql(query1, conn)
ewrims_flat_file_all = pd.read_sql(query2, conn)

# Preview the result
print(ewrims_flat_file.head())
print(ewrims_flat_file_all.head())

# Perform inner join on APPLICATION_NUMBER
rr_pods_flat_file= pd.merge(rr_pods, ewrims_flat_file, on='app_number', how='inner')
rr_pods_flat_file_all = pd.merge(rr_pods,ewrims_flat_file_all, on = 'app_number', how = 'inner')

print(f"Number of matched water rights: {len(rr_pods_flat_file)}")
rr_pods_flat_file.head(20)

  ewrims_flat_file = pd.read_sql(query1, conn)
  ewrims_flat_file_all = pd.read_sql(query2, conn)


  app_number Effective_Date
0   18-09-02     07/11/2012
1   24-02-02     05/23/2008
2   28-36-01           None
3      40-00           None
4   40-03-08           None
  WR_WATER_RIGHT_ID APPLICATION_NUMBER CERTIFICATE_ID PERMIT_ID LICENSE_ID  \
0                 1            T032025           None      None       None   
1                10            A000052         000025    000013     000025   
2               100            A000679         000871    000299     000871   
3              1000            A005269         001173    002727     001173   
4             10000            A024457         011352    016952     011352   

   WATER_RIGHT_TYPE WATER_RIGHT_STATUS APPLICATION_NUMBER_PARTY PWSS_ID  \
0  Temporary Permit          Cancelled                  T032025    None   
1     Appropriative           Licensed                  A000052    None   
2     Appropriative           Licensed                  A000679    None   
3     Appropriative           Licensed                  A005269

Unnamed: 0,app_number,Effective_Date
0,A001029,01/15/1920
1,A001205,06/03/1920
2,A001983,08/26/1920
3,A002723,01/24/1927
4,A002928,07/14/1922
5,A003421,05/16/1923
6,A003565,08/03/1923
7,A003601,08/20/1923
8,A003633,04/18/1928
9,A004307,03/30/1928


In [7]:
# Ensure Effective_Date is a datetime object
rr_pods_flat_file['Effective_Date'] = pd.to_datetime(rr_pods_flat_file['Effective_Date'], errors='coerce')

# Define years for eligibility tagging
years = list(range(2017, 2025))

# Apply eligibility logic based on Effective_Date
for year in years:
    cutoff = pd.to_datetime(f'{year - 1}-12-31')
    rr_pods_flat_file[f'Eligible_{year}'] = rr_pods_flat_file['Effective_Date'].apply(
        lambda x: 'Y' if pd.notnull(x) and x <= cutoff else 'N'
    )

# Preview relevant columns
columns_to_show = ['app_number', 'Effective_Date'] + [f'Eligible_{y}' for y in years]
rr_pods_flat_file[columns_to_show].head(10)

# Count Eligible Rights By Year--create a summary table

# Initialize an empty list to collect summary data
# Initialize an empty list to collect summary data
eligibility_counts = []

# Loop through each Eligible column and count the Ys
for year in range(2017, 2025):
    col_name = f'Eligible_{year}'
    count = rr_pods_flat_file[col_name].value_counts().get('Y', 0)
    eligibility_counts.append({'Year': year, 'Eligible_Count': count})  # <-- needs to be indented!


eligibility_summary_df = pd.DataFrame(eligibility_counts)
eligibility_summary_df

Unnamed: 0,Year,Eligible_Count
0,2017,2006
1,2018,2096
2,2019,2133
3,2020,2155
4,2021,2162
5,2022,2168
6,2023,2181
7,2024,2194


In [5]:
test_query = "SELECT TOP 5 * FROM ReportDB.FLAT_FILE.ewrims_water_use_report"
test_df = pd.read_sql(test_query, conn)

# Print all column names
print(test_df.columns.tolist())

['WATER_RIGHT_ID', 'APPL_ID', 'YEAR', 'MONTH', 'AMOUNT', 'DIVERSION_TYPE']


  test_df = pd.read_sql(test_query, conn)


In [6]:
# Import ewrims_flat_file_water_report (but just the rights in the Russian River)

# Get unique app_numbers in the Russian River as a list of strings
app_numbers = rr_pods_flat_file['app_number'].dropna().unique().tolist()

# Create a DataFrame of unique app_numbers
app_numbers_df = pd.DataFrame({'app_number': app_numbers})


cursor = conn.cursor()

# Drop temp table if it exists (to be safe)
cursor.execute("IF OBJECT_ID('tempdb..#AppNumbers') IS NOT NULL DROP TABLE #AppNumbers")
cursor.execute("CREATE TABLE #AppNumbers (app_number NVARCHAR(50))")

# Insert values into the temp table
for app in app_numbers:
    cursor.execute("INSERT INTO #AppNumbers (app_number) VALUES (?)", app)

conn.commit()

query = """
SELECT Distinct
 R.APPL_ID As Application_Number, 
 R.Year
FROM ReportDB.FLAT_FILE.ewrims_water_use_report R
INNER JOIN #AppNumbers A ON R.APPL_ID = A.app_number
WHERE YEAR>= 2017
"""

report_df = pd.read_sql(query, conn)
print(f"Retrieved {len(report_df)} rows from filtered report table.")


  report_df = pd.read_sql(query, conn)


Retrieved 15403 rows from filtered report table.


In [19]:
# CALCULATING REPORTING PERCENTAGES

# Initialize an empty list to contain the results
reporting_summary = []

# year = 2024 # Use this for a single iteration of the loop
for year in range(2017,2025):
    eligible_col = f'Eligible_{year}'

    # Step 1: Get all eligible rights for this year
    eligible_df = rr_pods_flat_file[rr_pods_flat_file[eligible_col] == 'Y']
    eligible_apps = set(eligible_df['app_number'])

    # Step 2: Get all reported rights for this year from the report table
    report_df['Year'] = report_df['Year'].astype(int) # Convert the Year column to an integer type
    reported_df_year = report_df[report_df['Year'] == year]
    reported_apps = set(reported_df_year['Application_Number'])

    #Step 3: Intersect the 2 datasets to find the eligible rights that submitted reports
    matched_reports = eligible_apps & reported_apps

    # Step 4: Count the reports and calculate the percentages
    eligible_count = len(eligible_apps)
    reported_count = len(matched_reports)
    percentage = round(reported_count/eligible_count * 100, 2) if eligible_count > 0 else 0

    # Step 5: Store the results in the reporting_summary list
    reporting_summary.append({
        'Year': year,
        'Eligible_Count': eligible_count,
        'Reported_Count': reported_count,
        'Reporting_Percentage': percentage
    })

# Convert the reporting_summary list into a DataFrame
reporting_summary_df = pd.DataFrame(reporting_summary)

# Export report_summary_df as a csv
reporting_summary_df.to_csv("OutputData/reporting_summary_df_RR.csv", index = False)  

In [17]:
import os
os.getcwd()


'c:\\Users\\palemi\\Documents\\GitHub\\SDU_DWRAT_Outputs'