In [1]:
import numpy as np

In [2]:
print(np.__version__)

1.24.3


In [3]:
import numpy
import spacy

print(f"Numpy version: {numpy.__version__}")
nlp = spacy.load("en_core_web_sm")
print("Spacy model loaded successfully!")


Numpy version: 1.24.3
Spacy model loaded successfully!


In [6]:
# Example user input
user_input = "I want a red car that is fast and costs less than 100,000$"

# Process the text
doc = nlp(user_input)

# Visualize results
print("Tokens and their Parts of Speech:")
for token in doc:
    print(f"{token.text}: {token.pos_} ({token.dep_})")

print("\nNamed Entities Detected:")
for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")

Tokens and their Parts of Speech:
I: PRON (nsubj)
want: VERB (ROOT)
a: DET (det)
red: ADJ (amod)
car: NOUN (dobj)
that: PRON (nsubj)
is: AUX (relcl)
fast: ADJ (acomp)
and: CCONJ (cc)
costs: VERB (conj)
less: ADJ (amod)
than: ADP (quantmod)
100,000: NUM (dobj)
$: SYM (punct)

Named Entities Detected:
less than 100,000$: MONEY


In [9]:
import re  # For extracting numeric values

def extract_car_preferences(user_input):
    doc = nlp(user_input)

    # Initialize filters
    preferences = {
        "Make": None,
        "Color": None,
        "PriceRange": None,
        "Mileage": None,
        "PassengerCapacity": None,
        "BodyType": None,
    }

    # Extract named entities
    for ent in doc.ents:
        if ent.label_ == "MONEY":
            # Handle price range (e.g., "under $30,000")
            if "under" in user_input:
                preferences["PriceRange"] = f"<= {ent.text.replace('$', '').replace(',', '')}"
            elif "over" in user_input:
                preferences["PriceRange"] = f">= {ent.text.replace('$', '').replace(',', '')}"
        elif ent.label_ == "ORG":
            # Extract Make (e.g., "Toyota")
            preferences["Make"] = ent.text
        elif ent.label_ == "CARDINAL":
            # Handle number of passengers or mileage
            # Use regex to extract numeric values
            match = re.search(r"\d+", ent.text)
            if match:
                number = int(match.group())
                if "seats" in user_input or "passengers" in user_input:
                    preferences["PassengerCapacity"] = number
                elif "miles" in user_input or "mileage" in user_input:
                    preferences["Mileage"] = f"<= {number}"

    # Extract adjectives for color or style
    for token in doc:
        if token.pos_ == "ADJ":
            if token.text in ["red", "blue", "black", "white", "green"]:  # Expand with more colors
                preferences["Color"] = token.text
            elif token.text in ["SUV", "sedan", "truck"]:  # Body type
                preferences["BodyType"] = token.text

    return preferences


In [10]:
preferences = extract_car_preferences("I want a red Toyota SUV under $30,000 with at least 5 seats.")
print(preferences)


{'Make': 'Toyota', 'Color': 'red', 'PriceRange': '<= 30000', 'Mileage': None, 'PassengerCapacity': 5, 'BodyType': None}


In [11]:
preferences = extract_car_preferences("I want a Toyota SUV under $30,000 with at least 5 seats.")
print(preferences)

{'Make': 'Toyota', 'Color': None, 'PriceRange': '<= 30000', 'Mileage': None, 'PassengerCapacity': 5, 'BodyType': None}


In [13]:
preferences = extract_car_preferences("I'm looking for a big car, around 30,000$, with less than 50,000km in mileage, a green car.")
print(preferences)

{'Make': None, 'Color': 'green', 'PriceRange': None, 'Mileage': '<= 50', 'PassengerCapacity': None, 'BodyType': None}


In [14]:

# Create a dictionary of target fields with representative words
context_keywords = {
    "PassengerCapacity": ["seats", "passengers", "people"],
    "Mileage": ["mileage", "miles", "distance"],
    "PriceRange": ["cost", "budget", "price", "under", "over"],
    "Color": ["color", "shade", "red", "blue", "black"],
    "BodyType": ["SUV", "truck", "sedan", "convertible"]
}

def match_field_by_context(word):
    for field, keywords in context_keywords.items():
        for keyword in keywords:
            # Compare semantic similarity
            if nlp(word).similarity(nlp(keyword)) > 0.7:  # Adjust threshold as needed
                return field
    return None

In [15]:
def analyze_input(user_input):
    doc = nlp(user_input)

    # Try semantic matching
    preferences = {}
    for token in doc:
        matched_field = match_field_by_context(token.text)
        if matched_field:
            preferences[matched_field] = token.text

    # Fallback to rules for entities
    for ent in doc.ents:
        if ent.label_ == "MONEY":
            preferences["PriceRange"] = ent.text.replace("$", "").replace(",", "")

    return preferences

In [16]:
analyze_input(user_input)

  if nlp(word).similarity(nlp(keyword)) > 0.7:  # Adjust threshold as needed


{'Color': 'red',
 'Mileage': 'car',
 'PassengerCapacity': 'costs',
 'PriceRange': 'less than 100000'}

In [22]:
nlp("people").similarity(nlp("zebras"))

  nlp("people").similarity(nlp("zebras"))


0.663036879335241

In [25]:
import openai

def get_contextual_filters(user_input):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an assistant for car recommendations."},
            {"role": "user", "content": user_input}
        ]
    )
    return response['choices'][0]['message']['content']


In [27]:

# Define feature mappings
FEATURES = {
    "BodyType": ["SUV", "sedan", "convertible", "truck", "hatchback", "luxury", "big", "family", "compact"],
    "Color": ["red", "blue", "black", "white", "green", "yellow", "dark", "bright"],
    "PriceRange": ["cheap", "affordable", "expensive", "high end", "luxury", "under", "over", "less", "more"],
    "Mileage": ["mileage", "miles", "distance"],
    "PassengerCapacity": ["seats", "passengers", "people", "family", "room"],
    "Condition": ["used", "new", "pre-owned", "certified"],
    "Make": ["Ferrari", "Toyota", "Honda", "Ford", "BMW", "Mercedes", "Tesla"],
    "Sustainability": ["electric", "hybrid", "sustainable", "eco-friendly", "green"],
    "Safety": ["safe", "reliable", "sturdy", "secure"],
}


def extract_features(user_input):
    doc = nlp(user_input)
    
    # Initialize extracted features
    extracted = {
        "Make": None,
        "Color": [],
        "PriceRange": None,
        "Mileage": None,
        "BodyType": None,
        "PassengerCapacity": None,
        "Condition": None,
        "Sustainability": None,
        "Safety": None,
    }

    # Extract entities
    for ent in doc.ents:
        if ent.label_ == "MONEY":
            extracted["PriceRange"] = ent.text
        elif ent.label_ == "CARDINAL" or ent.label_ == "QUANTITY":
            if "miles" in user_input:
                extracted["Mileage"] = ent.text
            elif "seats" in user_input or "passengers" in user_input:
                extracted["PassengerCapacity"] = ent.text

    # Match keywords
    for token in doc:
        word = token.text.lower()
        for feature, keywords in FEATURES.items():
            if word in keywords:
                if feature == "Color":
                    extracted[feature].append(word)  # Allow multiple colors
                else:
                    extracted[feature] = word
    
    # Clean up results
    extracted["Color"] = list(set(extracted["Color"]))  # Remove duplicates
    return extracted

In [28]:
inputs = [
    "I'm looking for a big car, red or blue, under 40k $ and lower than 50,000 miles.",
    "Do you have a Ferrari, used.",
    "I want a cheap fast car for less than 90000.",
    "Give me a high end luxury SUV for me and my family.",
    "Suggest a safe car, affordable, sustainable car, in dark colors.",
]

for inp in inputs:
    print(f"Input: {inp}")
    features = extract_features(inp)
    print(f"Extracted Features: {features}")
    print("-" * 50)

Input: I'm looking for a big car, red or blue, under 40k $ and lower than 50,000 miles.
Extracted Features: {'Make': None, 'Color': ['red', 'blue'], 'PriceRange': 'under', 'Mileage': 'miles', 'BodyType': 'big', 'PassengerCapacity': None, 'Condition': None, 'Sustainability': None, 'Safety': None}
--------------------------------------------------
Input: Do you have a Ferrari, used.
Extracted Features: {'Make': None, 'Color': [], 'PriceRange': None, 'Mileage': None, 'BodyType': None, 'PassengerCapacity': None, 'Condition': 'used', 'Sustainability': None, 'Safety': None}
--------------------------------------------------
Input: I want a cheap fast car for less than 90000.
Extracted Features: {'Make': None, 'Color': [], 'PriceRange': 'less', 'Mileage': None, 'BodyType': None, 'PassengerCapacity': None, 'Condition': None, 'Sustainability': None, 'Safety': None}
--------------------------------------------------
Input: Give me a high end luxury SUV for me and my family.
Extracted Features: {

In [None]:
FEATURES = {
    "BodyType": {
        "keywords": ["SUV", "sedan", "big","Small"],
        "implications": {"big": ["SUV", "4x4"], "Small": ["sedan"]}
    }
}

In [None]:

def match_feature_by_similarity(word, feature_category):
    """
    Match a word to a feature category using semantic similarity.
    """
    doc_word = nlp(word)
    best_match = None
    best_score = 0.7  # Threshold for similarity
    for keyword in FEATURES[feature_category]["keywords"]:
        similarity = doc_word.similarity(nlp(keyword))
        if similarity > best_score:
            best_match = keyword
            best_score = similarity
    return best_match

In [None]:
import spacy
import inflect  # For singular/plural normalization
from rapidfuzz import fuzz  # For fuzzy matching

# Load spaCy model
nlp = spacy.load("en_core_web_sm")
p = inflect.engine()  # Inflect engine for singular/plural normalization

# Define the feature for BodyType
FEATURES = {
    "BodyType": {
        "keywords": ["SUV", "sedan", "big", "small"],
        "implications": {"big": ["SUV", "4x4"], "small": ["sedan"]}
    }
}

def normalize_word(word):
    """
    Normalize the word for comparison (lowercase, singular form).
    """
    word = word.lower()
    word = p.singular_noun(word) if p.singular_noun(word) else word  # Convert to singular if plural
    return word

def fuzzy_match(word, keyword_list, threshold=85):
    """
    Fuzzy match a word against a list of keywords.
    """
    for keyword in keyword_list:
        if fuzz.ratio(word, keyword.lower()) >= threshold:
            return keyword
    return None

def match_feature_by_similarity(word, feature_category):
    """
    Match a word to a feature category using semantic similarity and fuzzy matching.
    """
    doc_word = nlp(normalize_word(word))  # Normalize the word
    best_match = None
    best_score = 0.7  # Threshold for spaCy similarity
    for keyword in FEATURES[feature_category]["keywords"]:
        # Normalize keyword
        normalized_keyword = normalize_word(keyword)
        
        # Check similarity using spaCy
        similarity = doc_word.similarity(nlp(normalized_keyword))
        if similarity > best_score:
            best_match = keyword
            best_score = similarity
        
        # Check fuzzy match if similarity fails
        if not best_match:
            fuzzy_matched = fuzzy_match(word, FEATURES[feature_category]["keywords"])
            if fuzzy_matched:
                best_match = fuzzy_matched

    return best_match


def extract_body_type(user_input):
    """
    Extract BodyType feature from user input.
    """
    doc = nlp(user_input)

    # Initialize extracted BodyType and implication flag
    extracted_body_type = []
    implication_applied = False

    for token in doc:
        word = token.text.lower()

        # Apply implications for descriptive words first
        if word in FEATURES["BodyType"]["implications"]:
            extracted_body_type.extend(FEATURES["BodyType"]["implications"][word])
            implication_applied = True

        # Match BodyType keywords only if no implication has been applied
        elif not implication_applied:
            body_type = match_feature_by_similarity(word, "BodyType")
            if body_type:
                extracted_body_type.append(body_type)

    # Remove duplicates and return
    return list(set(extracted_body_type))

# Test the function with inputs
inputs = [
    "I'm looking for a big car.",
    "I want a small vehicle.",
    "Do you have any SUVs?",
    "Suggest a sedans for me.",
    "Is there a big 4x4 available?"
]

for inp in inputs:
    print(f"Input: {inp}")
    body_type = extract_body_type(inp)
    print(f"Extracted BodyType: {body_type}")
    print("-" * 50)

Input: I'm looking for a big car.
Extracted BodyType: ['4x4', 'SUV']
--------------------------------------------------
Input: I want a small vehicle.


  similarity = doc_word.similarity(nlp(normalized_keyword))


Extracted BodyType: ['Small', 'SUV']
--------------------------------------------------
Input: Do you have any SUVs?
Extracted BodyType: ['SUV']
--------------------------------------------------
Input: Suggest a sedans for me.
Extracted BodyType: ['sedan']
--------------------------------------------------
Input: Is there a big 4x4 available?
Extracted BodyType: ['4x4', 'SUV']
--------------------------------------------------


In [45]:
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Budget-related reference terms for semantic matching
REFERENCE_TERMS = {
    "max": nlp("less than"),
    "min": nlp("more than"),
    "range": nlp("between")
}

def parse_price(price_str):
    """Convert price strings to integers."""
    try:
        return int(price_str.replace("$", "").replace(",", "").replace("k", "000"))
    except ValueError:
        return None

def determine_budget_context(token):
    """
    Determine if a token represents a budget context
    using semantic similarity with predefined references.
    """
    max_similarity = token.similarity(REFERENCE_TERMS["max"])
    min_similarity = token.similarity(REFERENCE_TERMS["min"])
    range_similarity = token.similarity(REFERENCE_TERMS["range"])

    if max_similarity > 0.7:
        return "max"
    elif min_similarity > 0.7:
        return "min"
    elif range_similarity > 0.7:
        return "range"
    return None

def extract_price_range_dynamic(user_input):
    """
    Extract price range from user input using semantic similarity and dynamic parsing.
    """
    doc = nlp(user_input)

    # Initialize price range
    price_range = {"min": None, "max": None}

    # Extract potential prices and their contexts
    prices = []
    for token in doc:
        if token.ent_type_ in {"MONEY", "CARDINAL"} or "k" in token.text.lower():
            price = parse_price(token.text)
            if price:
                prices.append((price, token))

    for price, token in prices:
        # Determine the context of the price
        context = determine_budget_context(token.head)
        if context == "max":
            price_range["max"] = price
        elif context == "min":
            price_range["min"] = price
        elif context == "range" and len(prices) >= 2:
            sorted_prices = sorted([p[0] for p in prices])
            price_range["min"], price_range["max"] = sorted_prices[0], sorted_prices[1]

    # Ensure logical consistency
    if price_range["min"] and price_range["max"] and price_range["min"] > price_range["max"]:
        price_range["min"], price_range["max"] = price_range["max"], price_range["min"]

    return price_range

# Test the function with various inputs
inputs = [
    "I'm looking for a car under $40,000.",
    "I want a vehicle less than 50k.",
    "Show me cars over $20,000.",
    "Do you have any cars between $10,000 and $30,000?",
    "I want a car above 60k.",
    "Cars greater than $15,000.",
    "Find me a car that's over 25k but less than 75k.",
    "I need an SUV cheaper than $35,000.",
    "I want something priced around 50k.",
]

for inp in inputs:
    print(f"Input: {inp}")
    price_range = extract_price_range_dynamic(inp)
    print(f"Extracted Price Range: {price_range}")
    print("-" * 50)


Input: I'm looking for a car under $40,000.
Extracted Price Range: {'min': None, 'max': None}
--------------------------------------------------
Input: I want a vehicle less than 50k.
Extracted Price Range: {'min': None, 'max': None}
--------------------------------------------------
Input: Show me cars over $20,000.
Extracted Price Range: {'min': None, 'max': None}
--------------------------------------------------
Input: Do you have any cars between $10,000 and $30,000?
Extracted Price Range: {'min': 10000, 'max': 30000}
--------------------------------------------------
Input: I want a car above 60k.
Extracted Price Range: {'min': None, 'max': None}
--------------------------------------------------
Input: Cars greater than $15,000.
Extracted Price Range: {'min': None, 'max': None}
--------------------------------------------------
Input: Find me a car that's over 25k but less than 75k.
Extracted Price Range: {'min': None, 'max': None}
-----------------------------------------------

  max_similarity = token.similarity(REFERENCE_TERMS["max"])
  min_similarity = token.similarity(REFERENCE_TERMS["min"])
  range_similarity = token.similarity(REFERENCE_TERMS["range"])


In [50]:
from transformers import pipeline

# Load the pre-trained model for question answering
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased")

def extract_budget_using_qa(input_text):
    """
    Use a QA model to extract budget-related information dynamically.
    """
    questions = [
        "What is the maximum budget?",
        "What is the minimum budget?",
        "Are there any price ranges mentioned?",
    ]

    # Extract answers for budget-related questions
    extracted_info = {}
    for question in questions:
        answer = qa_pipeline(question=question, context=input_text)
        if answer["score"] > 0.5:  # Confidence threshold
            extracted_info[question] = answer["answer"]

    return extracted_info

# Test inputs
inputs = [
    "I'm looking for a car under $40,000.",
    "Show me cars over $20,000.",
    "Do you have any cars between $10,000 and $30,000?",
    "Find me a cheap car.",
    "I want a car above 60k.",
]

for inp in inputs:
    print(f"Input: {inp}")
    extracted = extract_budget_using_qa(inp)
    print(f"Extracted Info: {extracted}")
    print("-" * 50)


RuntimeError: At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.

In [52]:
!where python


c:\Users\karim\OneDrive\Desktop\CODEJAM14Repo\CodeJam14KKSB\venv\Scripts\python.exe
C:\Users\karim\AppData\Local\Programs\Python\Python311\python.exe


In [53]:
import tensorflow as tf
print(tf.__version__)


  _BoolLike_co = Union[bool, np.bool]


AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [57]:
from transformers import pipeline, is_tf_available

print(f"TensorFlow Available: {is_tf_available()}")
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased", framework="tf")
print("Model loaded successfully!")


TensorFlow Available: False


RuntimeError: At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.

In [55]:
import tensorflow as tf
import numpy as np

print(f"TensorFlow version: {tf.__version__}")
print(f"NumPy version: {np.__version__}")


TensorFlow version: 2.18.0
NumPy version: 1.24.3


In [58]:
import tensorflow as tf
print(tf.test.is_built_with_cuda())  # Check if CUDA is built-in (optional for CPU)
print(tf.config.list_physical_devices())  # List available devices (CPU/GPU)


False
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [59]:
from transformers import is_tf_available
print(f"TensorFlow Available in Hugging Face: {is_tf_available()}")


TensorFlow Available in Hugging Face: False


In [None]:
import tensorflow as tf
print(f"Is TensorFlow available: {tf.test.is_built_with_cuda()}")
print(f"Available devices: {tf.config.list_physical_devices()}")


In [60]:
import tensorflow as tf
from transformers import pipeline, is_tf_available

print(f"TensorFlow Available in Hugging Face: {is_tf_available()}")

# Explicitly set the framework
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased", framework="tf")
print("Model loaded successfully!")


TensorFlow Available in Hugging Face: False


RuntimeError: At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.