In [1]:
# requirements 
%pip install pandas --quiet 
%pip install flask requests --quiet
%pip install ollama --quiet

print("Requirements installed successfully!")


You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Requirements installed successfully!


In [2]:
# imports
print("Starting imports...")
import pandas as pd
from flask import Flask, request, jsonify
import ollama
import json
import requests
import re

print("Successfully imported!")

Starting imports...
Successfully imported!




In [3]:
# Define the Flask app
app = Flask(__name__)

In [4]:
# Load the dataset into a df
carsDF = pd.read_csv('vehicles.csv')

In [5]:
# # view dataset 
# carsDF.head()

Data Cleaning and Preprocessing 

In [6]:
# Normalize text values to title case for consistency
carsDF['Make'] = carsDF['Make'].str.strip().str.upper()  # Remove extra spaces and convert to uppercase
carsDF['Model'] = carsDF['Model'].str.strip().str.upper()  # Remove extra spaces and convert to uppercase


In [7]:
# # Verify normalization
# print(carsDF[['Make', 'Model']])

In [8]:
# Data Type Correction
# Convert numerical columns
carsDF['Year'] = carsDF['Year'].astype(int)
carsDF['SellingPrice'] = carsDF['SellingPrice'].astype(float)
carsDF['Doors'] = carsDF['Doors'].astype(int)


In [9]:
carsDF['PriceRange'] = pd.cut(carsDF['SellingPrice'], 
                              bins=[0, 10000, 20000, 30000, 50000, 100000],
                              labels=['Under 10K', '10K-20K', '20K-30K', '30K-50K', 'Above 50K'])


In [10]:
def extract_preferences(message):
    """
    Function to extract user preferences from a message.
    Arguments:
    - message: User's message as a string.

    Returns:
    - preferences: A dictionary with extracted preferences.
    """
    preferences = {}

    if "new" in message.lower() or "used" in message.lower():
        preferences['car_type'] = 'New' if 'new' in message.lower() else 'Used'
    
    if 'year' in message.lower():
        year = [int(word) for word in message.split() if word.isdigit() and len(word) == 4]
        if year:
            preferences['year'] = year[0]

    if any(make in message.lower() for make in carsDF['Make'].str.lower().unique()):
        preferences['make'] = next(make.title() for make in carsDF['Make'].str.lower().unique() if make in message.lower())

    # Extract price range (e.g., "under $20,000", "between $20,000 and $30,000")
    price_under = re.search(r'under\s*\$?(\d{1,3}(?:,\d{3})*)', message, re.IGNORECASE)
    price_between = re.search(r'between\s*\$?(\d{1,3}(?:,\d{3})*)\s*and\s*\$?(\d{1,3}(?:,\d{3})*)', message, re.IGNORECASE)
    
    if price_under:
        max_price = int(price_under.group(1).replace(',', ''))
        preferences['max_price'] = max_price
    elif price_between:
        min_price = int(price_between.group(1).replace(',', ''))
        max_price = int(price_between.group(2).replace(',', ''))
        preferences['min_price'] = min_price
        preferences['max_price'] = max_price

    # Extract mileage (e.g., "low mileage" or "<50,000 miles")
    if "low mileage" in message.lower():
        preferences['max_miles'] = 50000  # Assume "low mileage" means under 50,000 miles
    mileage = re.search(r'(\d{1,3}(?:,\d{3})*)\s*miles', message, re.IGNORECASE)
    if mileage:
        preferences['max_miles'] = int(mileage.group(1).replace(',', ''))

    # Extract exterior color preference
    colors = carsDF['Ext_Color_Generic'].dropna().apply(str).str.lower().unique()
    for color in colors:
        if color in message.lower():
            preferences['exterior_color'] = color.title()
            break

    # Extract drivetrain preference (e.g., AWD, FWD, RWD)
    drivetrains = ['AWD', 'FWD', 'RWD', '4WD']
    for drivetrain in drivetrains:
        if drivetrain.lower() in message.lower():
            preferences['drivetrain'] = drivetrain.upper()
            break
    

    return preferences


In [11]:
def get_matching_cars(message):
    """
    Function to extract user preferences from a message and return matching cars from the dataset.
    
    Arguments:
    - message: User's message as a string.
    
    Returns:
    - matching_cars: A DataFrame with cars that match user preferences.
    """
    # Extract preferences using the existing function
    preferences = extract_preferences(message)
    
    # Filter the dataset based on preferences
    filtered_cars = carsDF
    
    # Apply each preference as a filter
    if 'car_type' in preferences:
        if preferences['car_type'] == 'New':
            filtered_cars = filtered_cars[filtered_cars['Type'] == 'New']
        elif preferences['car_type'] == 'Used':
            filtered_cars = filtered_cars[filtered_cars['Type'] == 'Used']
    
    if 'year' in preferences:
        filtered_cars = filtered_cars[filtered_cars['Year'] == preferences['year']]
    
    if 'make' in preferences:
        filtered_cars = filtered_cars[filtered_cars['Make'].str.upper() == preferences['make'].upper()]
    
    if 'min_price' in preferences and 'max_price' in preferences:
        filtered_cars = filtered_cars[
            (filtered_cars['SellingPrice'] >= preferences['min_price']) & 
            (filtered_cars['SellingPrice'] <= preferences['max_price'])
        ]
    elif 'max_price' in preferences:
        filtered_cars = filtered_cars[filtered_cars['SellingPrice'] <= preferences['max_price']]
    
    if 'max_miles' in preferences:
        filtered_cars = filtered_cars[filtered_cars['Miles'] <= preferences['max_miles']]
    
    if 'exterior_color' in preferences:
        filtered_cars = filtered_cars[filtered_cars['Ext_Color_Generic'].str.lower() == preferences['exterior_color'].lower()]
    
    if 'drivetrain' in preferences:
        filtered_cars = filtered_cars[filtered_cars['Drivetrain'].str.upper() == preferences['drivetrain'].upper()]
    
    # Return the filtered dataset with relevant columns
    if not filtered_cars.empty:
        return filtered_cars[['Type', 'Stock', 'VIN', 'Year', 'Make', 'Model', 'Body', 'ModelNumber',
       'Doors', 'ExteriorColor', 'InteriorColor', 'EngineCylinders',
       'EngineDisplacement', 'Transmission', 'Miles', 'SellingPrice', 'MSRP',
       'BookValue', 'Invoice', 'Certified', 'Options', 'Style_Description',
       'Ext_Color_Generic', 'Ext_Color_Code', 'Int_Color_Generic',
       'Int_Color_Code', 'Int_Upholstery', 'Engine_Block_Type',
       'Engine_Aspiration_Type', 'Engine_Description', 'Transmission_Speed',
       'Transmission_Description', 'Drivetrain', 'Fuel_Type', 'CityMPG',
       'HighwayMPG', 'EPAClassification', 'Wheelbase_Code', 'Internet_Price',
       'MarketClass', 'PassengerCapacity', 'ExtColorHexCode',
       'IntColorHexCode', 'EngineDisplacementCubicInches', 'PriceRange']].head(5)  # Returning top 10 matches for brevity
    else:
        return None


In [12]:
print(carsDF.columns)


Index(['Type', 'Stock', 'VIN', 'Year', 'Make', 'Model', 'Body', 'ModelNumber',
       'Doors', 'ExteriorColor', 'InteriorColor', 'EngineCylinders',
       'EngineDisplacement', 'Transmission', 'Miles', 'SellingPrice', 'MSRP',
       'BookValue', 'Invoice', 'Certified', 'Options', 'Style_Description',
       'Ext_Color_Generic', 'Ext_Color_Code', 'Int_Color_Generic',
       'Int_Color_Code', 'Int_Upholstery', 'Engine_Block_Type',
       'Engine_Aspiration_Type', 'Engine_Description', 'Transmission_Speed',
       'Transmission_Description', 'Drivetrain', 'Fuel_Type', 'CityMPG',
       'HighwayMPG', 'EPAClassification', 'Wheelbase_Code', 'Internet_Price',
       'MarketClass', 'PassengerCapacity', 'ExtColorHexCode',
       'IntColorHexCode', 'EngineDisplacementCubicInches', 'PriceRange'],
      dtype='object')


In [13]:
# Sample message to test the get_matching_cars function
#test_message = "I'm looking for a used car from 2019, preferably under $30,000 with low mileage."
test_message = "I'm looking for a 2014 Honda with high mileage."

# Get matching cars
matching_cars = get_matching_cars(test_message)

# Display the result
if matching_cars is not None:
    print("Matching Cars:")
    print(matching_cars)
else:
    print("No cars match the specified preferences.")



Matching Cars:
    Type    Stock                VIN  Year   Make            Model  \
8    New  H254838  5CTG8SJWZC5HKDNLB  2018  HONDA             HR-V   
28   New  T965130  Z8JS24DT5ZT69T5R5  2023  HONDA            CIVIC   
34  Used  H108383  CMW5BPVEUF98K6H8D  2014  HONDA           ACCORD   
35  Used  P541001  2UJ6TY3LFP3222GJP  2024  HONDA  CIVIC HATCHBACK   
39  Used  F884425  VRB2D001UP7F84Z4H  2024  HONDA  CIVIC HATCHBACK   

             Body  ModelNumber  Doors              ExteriorColor  ...  \
8   Sport Utility  HOHR-VH558X      4      Lunar Silver Metallic  ...   
28        4dr Car  HOCI-ML043L      4                 Rallye Red  ...   
34        4dr Car  HOAC-FD381Y      4  Alabaster Silver Metallic  ...   
35      Hatchback  HOCI-XT098D      4       Platinum White Pearl  ...   
39      Hatchback  HOCI-AN230U      4       Platinum White Pearl  ...   

   HighwayMPG     EPAClassification Wheelbase_Code Internet_Price  \
8        31.0  Small Station Wagons          102.8      

Integrating LLM 

In [19]:


# Main Loop to Interact with Llama 2 and Extract Preferences
while True:
    user_input = input("Enter your car preferences (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break

    # Prepare the input in the correct format
    messages = [
        {"role": "user", "content": user_input}
    ]

    # Use Llama 2 to generate a response
    try:
        response = ollama.chat(model="llama2", messages=messages)
        print("\nLlama 2 Response:")
        print(response.content)  # Assuming response has a 'content' attribute containing the response text
    except TypeError as e:
        print(f"An error occurred while calling ollama: {e}")
        response = "Unable to generate response from Llama 2."
    except ValueError as e:
        print(f"An error occurred while processing the response: {e}")
        response = "Unable to generate response from Llama 2."

    # Extract preferences and find matching cars
    preferences = extract_preferences(user_input)
    matching_cars = get_matching_cars(user_input)

    # Print User Preferences
    print("\nExtracted Preferences:")
    print(preferences)

    # Display Matching Cars
    if matching_cars is not None:
        print("\nMatching Cars:")
        print(matching_cars)
    else:
        print("\nNo cars match the specified preferences.\n")
