# This notebook helps for extracting unique value of each predicted variables with the help for buidling streamlit app

In [1]:
import duckdb
import json
import pandas as pd

In [2]:
con = duckdb.connect("../database/prediction_data.duckdb")

columns_to_extract = [
        "product",
        "sub_product",
        "issue",
        "company",
        "state",
        "submitted_via",
        "consumer_consent_provided"
    ]
    
options_data = {}
    
for col in columns_to_extract:
    try:
        # Query unique, non-null values and sort them
        query = f"""
        SELECT DISTINCT {col} 
        FROM prediction_data 
        WHERE {col} IS NOT NULL 
        ORDER BY {col} ASC
        """
        df = con.sql(query).df()
        
        # Convert to a simple Python list
        unique_values = df[col].tolist()
        options_data[col] = unique_values
        
        print(f" - {col}: {len(unique_values)} unique values found.")
        
    except Exception as e:
        print(f"Error extracting {col}: {e}")
        options_data[col] = []

con.close()

 - product: 14 unique values found.
 - sub_product: 58 unique values found.
 - issue: 87 unique values found.
 - company: 9 unique values found.
 - state: 60 unique values found.
 - submitted_via: 5 unique values found.
 - consumer_consent_provided: 5 unique values found.


In [3]:
# Save to a JSON file
output_file = "../src/models/options.json"
with open(output_file, "w") as f:
    json.dump(options_data, f, indent=4)
    
print(f"Success! Options saved to {output_file}")

Success! Options saved to ../src/models/options.json
