<a href="https://colab.research.google.com/github/ProjectInvent1A/AIStudioProject/blob/main/Development%20Process/DashInterfaceWork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install fast-dash

Collecting fast-dash
  Downloading fast_dash-0.2.9-py3-none-any.whl.metadata (6.7 kB)
Collecting Flask<3.0.0,>=2.0.2 (from fast-dash)
  Downloading flask-2.3.3-py3-none-any.whl.metadata (3.6 kB)
Collecting Pillow<10.0.0,>=9.2.0 (from fast-dash)
  Downloading Pillow-9.5.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.5 kB)
Collecting dash-bootstrap-components<2.0.0,>=1.0.2 (from fast-dash)
  Downloading dash_bootstrap_components-1.6.0-py3-none-any.whl.metadata (5.2 kB)
Collecting dash-iconify<0.2.0,>=0.1.2 (from fast-dash)
  Downloading dash_iconify-0.1.2-py3-none-any.whl.metadata (2.2 kB)
Collecting dash-mantine-components<0.13.0,>=0.12.1 (from fast-dash)
  Downloading dash_mantine_components-0.12.1-py3-none-any.whl.metadata (3.4 kB)
Collecting docstring-parser<0.16,>=0.15 (from fast-dash)
  Downloading docstring_parser-0.15-py3-none-any.whl.metadata (2.4 kB)
Collecting matplotlib<=3.7.1 (from fast-dash)
  Downloading matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required packages
import dash

# Import libraries
from dash import callback, Dash, dcc, html, no_update
from dash.dependencies import Input, Output, State
import pandas as pd
import numpy as np
import json
import torch
from sentence_transformers import SentenceTransformer, util
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import ast
import os
import time

# Load the dataset and initialize the model
geolocator = Nominatim(user_agent="Community_Matching")
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

path = "/content/drive/My Drive/BTT Project Invent Team 1A/Zoe's API work/df_coordinates_cleaned.csv"
df_cities = pd.read_csv(path, low_memory = False)
df_cities.shape

path = "/content/drive/My Drive/BTT Project Invent Team 1A/BertEmbeddingCSVs/final_dataset.csv"
final_df = pd.read_csv(path, low_memory = False)
final_df.shape

# Create the Dash app
app = Dash(__name__)

# List of US states with abbreviations
US_STATES_ABBR = [
    "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME",
    "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA",
    "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"
]


# Define the app layout
app.layout = html.Div(
    children=[
        # Title and Logo
        html.Div(
            children=[
                html.H1("Project Invent", style={"text-align": "center", "font-size": "24px"}),
                html.H2("Organization Matching", style={"text-align": "center", "font-size": "16px"}),
            ]
        ),

        # Input Section
        html.Div(
            children=[
                html.Label("What is your topic of interest?", style={"font-weight": "bold", "color": "#1B1464", "display": "block"}),
                dcc.Input(
                    id="topic-input",
                    type="text",
                    placeholder="Enter a topic",
                    style={"margin": "0 auto", "display": "block", "margin-bottom": "15px", "width": "50%", "padding": "10px", "color": "#333566"},
                ),
                html.Label("What city are you in?", style={"font-weight": "bold", "color": "#1B1464", "display": "block"}),
                dcc.Input(
                    id="city-input",
                    type="text",
                    placeholder="Enter a city",
                    style={"margin": "0 auto", "display": "block", "margin-bottom": "15px", "width": "50%", "padding": "10px", "color": "#333566"},
                ),
                html.Label("Select your state:", style={"font-weight": "bold", "color": "#1B1464", "display": "block"}),
                dcc.Dropdown(
                    id="state-dropdown",
                    options=[{"label": state, "value": state} for state in US_STATES_ABBR],
                    placeholder="Select a state",
                    style={"margin": "0 auto", "display": "block", "margin-bottom": "15px", "width": "50%", "padding": "10px", "color": "#333566"},
                ),
                html.Label("What is your zip code?", style={"font-weight": "bold", "color": "#1B1464", "display": "block"}),
                dcc.Input(
                    id="zipcode-input",
                    type="text",
                    placeholder="Enter a zip code",
                    style={"margin": "0 auto", "display": "block", "margin-bottom": "15px", "width": "50%", "padding": "10px", "color": "#333566"},
                ),
                html.Button("Submit", id="submit-button", n_clicks=0, style={"margin-top": "15px", "color": "#333566"}),
                dcc.Loading(id="loading-bar", children=[html.Div(id="loading-bar-output")], style={"position": "relative", "top": "75px"}),
            ],
            style={"width": "50%", "margin": "0 auto", "text-align": "center", "backgroundColor":"white"},
        ),

        # Results Section
        html.Div(
            id="results-div",
            children=[
                html.H3("Your top 5 results:", style={"margin-top": "30px", "color": "#1B1464"}),
                html.Div(id="results-list"),
            ],
            style={"text-align": "center", "margin-top": "20px"},
        ),
    ],
    style={"font-family": "Arial, sans-serif", "padding": "20px", "backgroundColor":"white"},
)

# Helper function to find nearby cities for a given (latitude, longitude) location
# out of the list of cities, which has tuples of (city, state, coordinates)
# and within a threshold, which defaults to 30 miles (higher threshold makes it take longer to run)
def find_nearby_cities(location, loc_state, cities, threshold=15):
  if not location:
    print(f"location not found.")

  nearby_cities = []

  # Calculate distance to each other city
  for city, state, coordinates in cities:
    if loc_state == state:
      coords = tuple(map(float, coordinates.strip("()").split(',')))
      distance = geodesic(location, coords).miles
      if distance <= threshold:
        nearby_cities.append((city, state))

  return nearby_cities

# Define callback for button click
@app.callback(
    [Output("results-list", "children"),
    Output("topic-input", "value"),
    Output("city-input", "value"),
    Output("state-dropdown", "value"),
    Output("zipcode-input", "value"),
    Output("loading-bar-output", "children")],
    [Input("submit-button", "n_clicks")],
    [State("topic-input", "value"),
    State("city-input", "value"),
    State("state-dropdown", "value"),
    State("zipcode-input", "value")],
)

def update_results(n_clicks, topic, city, state, zipcode):
    if n_clicks > 0 and topic and city and state and zipcode:

      time.sleep(2)

      input_location = f"{city}, {state}"

      # Geocode user location
      user_location = geolocator.geocode(input_location, country_codes="US")
      if user_location is None:
            return [
                html.P("Could not find your location. Please try again.", style={"color": "red"})
            ], no_update, no_update, no_update, no_update, no_update

      # get cities within 30 miles and only look at those entries in the dataframe
      cities_list = list(zip(df_cities['city'], df_cities['state'], df_cities['coordinates']))
      nearby_cities = find_nearby_cities((user_location.latitude, user_location.longitude), state, cities_list)

      nearby_data = {
          'embeddings': [],
          'names': [],
          'cities': [],
          'states': [],
          'descriptions': [],
          'websites': []
          }

        # make all strings uppercase so checking for equality works
      final_df['state_upper'] = final_df['state'].str.upper()
      final_df['city_upper'] = final_df['city'].str.upper()

      # for each nearby city, get rows from the whole dataframe that are in that city and save needed info in lists
      for city, state in nearby_cities:
          df_nearby = final_df[(final_df['city_upper'] == city) & (final_df['state_upper'] == state)]
          #array_embedding = ast.literal_eval(df_nearby['embeddings'])
          nearby_data['embeddings'].extend(df_nearby['embeddings'].apply(json.loads).tolist())
          nearby_data['names'].extend(df_nearby['name'].tolist())
          nearby_data['cities'].extend(df_nearby['city'].tolist())
          nearby_data['states'].extend(df_nearby['state'].tolist())
          nearby_data['descriptions'].extend(df_nearby['Text'].tolist())
          websites = df_nearby['organization_url'].where(pd.notna(df_nearby['organization_url']), None).tolist()
          nearby_data['websites'].extend(websites)

      embeddings_nearby = nearby_data['embeddings']
      names_nearby = nearby_data['names']
      cities_nearby = nearby_data['cities']
      states_nearby = nearby_data['states']
      descriptions_nearby = nearby_data['descriptions']
      websites_nearby = nearby_data['websites']

      #Compute similarity scores
      input_embedding = model.encode(topic, convert_to_tensor=True)

      # Compute cosine similarity
      cosine_scores = util.pytorch_cos_sim(input_embedding, np.array(embeddings_nearby, dtype=np.float32))

      # Find the most similar descriptions
      most_similar = np.argsort(cosine_scores.numpy()[0])[::-1][:5]


      results = []
      for idx in most_similar:
          org_name = names_nearby[idx]
          org_city = cities_nearby[idx]
          org_state = states_nearby[idx]
          org_description = descriptions_nearby[idx]
          org_website = websites_nearby[idx]
          similarity_score = cosine_scores.numpy()[0][idx]

          # Create a formatted HTML div for each result
          results.append(
              html.Div(
                  children=[
                      html.H4(f"{org_name}"),
                      html.P(f"Location: {org_city}, {org_state}"),
                      html.P(f"About: {org_description}"),
                      #html.P(f"Website: {org_website}") if org_website else None
                      #html.A("Visit Website", href=org_website, target="_blank") if org_website else None,
                      html.P(["Website: ",
                                html.A(org_website, href=f"https://{org_website}" if not org_website.startswith(("http://", "https://")) else org_website, target="_blank")]
                        ) if org_website else None,
                  ],
                  style={"margin": "10px", "padding": "10px", "border": "1px solid #ddd"},
              )
          )

      # Return the formatted results for the Dash app
      return results, no_update, no_update, no_update, no_update, no_update
    else:
        return (
            html.P("Please fill in the text boxes and click submit.", style={"color": "gray"}),
            no_update, no_update, no_update, no_update, no_update,
        )

# Run the Dash app inline
if "DASH_APP_RUNNING" not in os.environ:
    os.environ["DASH_APP_RUNNING"] = "1"
    app.run_server(mode="external", port=8050)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

<IPython.core.display.Javascript object>