# üöó Vehicle Search Testing

Quick notebook to test fuzzy matching for vehicle marks and models.


In [1]:
# Install dependencies (run once if needed)
# !pip install rapidfuzz pandas pyarrow


In [2]:
import pandas as pd
from matcher import VehicleMatcher

# Load your data
matcher = VehicleMatcher(parquet_path='vehicles_db.parquet')

print(f"Loaded {len(matcher.df)} vehicles")
print(f"Unique brands: {len(matcher.unique_marks)}")
print(f"\nSample data:")
matcher.df[[matcher.mark_column, matcher.model_column]].head(10)


Loaded 1976 vehicles
Unique brands: 172

Sample data:


Unnamed: 0,brand,model
0,Toyota,Vitz
1,LADA (–í–ê–ó),2107
2,Daewoo,Nubira
3,Chevrolet,Aveo
4,Chevrolet,Lacetti
5,Kia,Cerato
6,Kia,Sportage
7,Nissan,Wingroad
8,LADA (–í–ê–ó),2115
9,Volkswagen,Transporter


## üîç Test Search

Enter your test queries below:


In [11]:
# === EDIT THESE VALUES TO TEST ===
query_mark = "–ª–∞–¥–∞"   # Brand/mark query (can have typos)
query_model = "–≤–µ—Å—Ç–∞ gfk11"   # Model query (can have typos)
top_k = 5                  # Number of results

# Search
results = matcher.match(query_mark, query_model, top_k=top_k)

print(f"Query: '{query_mark}' + '{query_model}'")
print(f"Found {len(results)} results:\n")

for i, r in enumerate(results, 1):
    print(f"{i}. {r.mark} {r.model}")
    print(f"   Score: {r.combined_score:.1f} (mark: {r.mark_score:.1f}, model: {r.model_score:.1f})")


Query: '–ª–∞–¥–∞' + '–≤–µ—Å—Ç–∞ gfk11'
Found 5 results:

1. LADA (–í–ê–ó) Vesta
   Score: 90.0 (mark: 90.0, model: 90.0)
2. LADA (–í–ê–ó) 2115
   Score: 72.0 (mark: 90.0, model: 60.0)
3. LADA (–í–ê–ó) 2114
   Score: 72.0 (mark: 90.0, model: 60.0)
4. LADA (–í–ê–ó) 2110
   Score: 72.0 (mark: 90.0, model: 60.0)
5. LADA (–í–ê–ó) 2101
   Score: 72.0 (mark: 90.0, model: 60.0)


## üß™ Batch Testing

Test multiple queries at once:


In [4]:
# Test queries - add your own!
test_queries = [
    ("–º–µ—Ä—Å–µ–¥–µ—Å", "–µ –∫–ª–∞—Å—Å"),      # Russian
    ("mersedes", "e class"),       # Transliteration with typo
    ("—Ç–æ–µ—Ç–∞", "–∫–∞–º—Ä–∏"),            # Toyota Camry (typo)
    ("–±–º–≤", "x5"),                 # BMW X5
    ("volkswagn", "passat"),       # Typo
    ("—Ö—ë–Ω–¥–∞–π", "—Å–æ–ª—è—Ä–∏—Å"),         # Hyundai Solaris
    ("–∞—É–¥–∏", "–∞4"),                # Audi A4
    ("—Ñ–æ—Ä–¥", "—Ñ–æ–∫—É—Å"),             # Ford Focus
]

print("=" * 60)
for query_mark, query_model in test_queries:
    results = matcher.match(query_mark, query_model, top_k=3)
    
    print(f"\nüîç Query: '{query_mark}' + '{query_model}'")
    if results:
        for r in results:
            print(f"   ‚Üí {r.mark} {r.model} (score: {r.combined_score:.1f})")
    else:
        print("   ‚ùå No matches found")
print("\n" + "=" * 60)



üîç Query: '–º–µ—Ä—Å–µ–¥–µ—Å' + '–µ –∫–ª–∞—Å—Å'
   ‚Üí Mercedes-Benz Maybach S-–ö–ª–∞—Å—Å (score: 81.0)
   ‚Üí Mercedes-Benz M-–ö–ª–∞—Å—Å (score: 78.9)
   ‚Üí Mercedes-Benz B-–ö–ª–∞—Å—Å (score: 78.9)

üîç Query: 'mersedes' + 'e class'
   ‚Üí Mercedes-Benz CLA (score: 85.5)
   ‚Üí Mercedes-Benz CLS (score: 72.5)
   ‚Üí Mercedes-Benz GLA (score: 67.5)

üîç Query: '—Ç–æ–µ—Ç–∞' + '–∫–∞–º—Ä–∏'
   ‚Üí Toyota Cami (score: 80.0)
   ‚Üí Toyota Sprinter Carib (score: 76.0)
   ‚Üí Toyota Carina E (score: 76.0)

üîç Query: '–±–º–≤' + 'x5'
   ‚Üí BMW X5 (score: 100.0)
   ‚Üí BMW X5 M (score: 94.0)
   ‚Üí BMW 5 —Å–µ—Ä–∏–∏ (score: 76.0)

üîç Query: 'volkswagn' + 'passat'
   ‚Üí Volkswagen Passat (score: 97.9)
   ‚Üí Volkswagen Passat CC (score: 91.9)
   ‚Üí Volkswagen Passat (North America) (score: 91.9)

üîç Query: '—Ö—ë–Ω–¥–∞–π' + '—Å–æ–ª—è—Ä–∏—Å'
   ‚Üí Hyundai Solaris (score: 96.0)
   ‚Üí Hyundai Genesis Coupe (score: 68.0)
   ‚Üí Hyundai Pony (score: 67.0)

üîç Query: '–∞—É–¥–∏' + '–∞4'


## üè∑Ô∏è Search Brands Only


In [5]:
# Search for similar brands
query = "—Ñ–æ–ª—å–∫–≤–∞–≥–µ–Ω"  # Volkswagen with typo

marks = matcher.find_similar_marks(query, top_k=10)

print(f"Similar brands to '{query}':")
for mark, score in marks:
    print(f"  {mark}: {score:.1f}")


Similar brands to '—Ñ–æ–ª—å–∫–≤–∞–≥–µ–Ω':
  volkswagen: 73.7
  fso: 72.0
  kia: 60.0
  ford: 60.0
  luxgen: 60.0
  geo: 60.0
  eagle: 60.0
  delage: 60.0
  volvo: 54.0
  opel: 51.4


## üîß Interactive Search Helper


In [6]:
def search(mark: str, model: str, top_k: int = 5):
    """Quick search function - returns a nice DataFrame."""
    results = matcher.match(mark, model, top_k=top_k)
    
    data = []
    for r in results:
        data.append({
            'Brand': r.mark,
            'Model': r.model,
            'Score': f"{r.combined_score:.1f}",
            'Mark Score': f"{r.mark_score:.1f}",
            'Model Score': f"{r.model_score:.1f}"
        })
    
    return pd.DataFrame(data)

# Use like this:
search("–º–µ—Ä—Å–µ–¥–µ—Å", "—Å –∫–ª–∞—Å—Å")


Unnamed: 0,Brand,Model,Score,Mark Score,Model Score
0,Mercedes-Benz,Maybach S-–ö–ª–∞—Å—Å,81.0,90.0,75.0
1,Mercedes-Benz,A-–ö–ª–∞—Å—Å AMG,80.0,90.0,73.3
2,Mercedes-Benz,M-–ö–ª–∞—Å—Å,78.9,90.0,71.4
3,Mercedes-Benz,B-–ö–ª–∞—Å—Å,78.9,90.0,71.4
4,Mercedes-Benz,C-–ö–ª–∞—Å—Å,78.9,90.0,71.4


## üìä View All Unique Brands in Database


In [7]:
# Show all unique brands in the database
print(f"All {len(matcher.unique_marks)} unique brands:\n")
for i, mark in enumerate(sorted(matcher.unique_marks), 1):
    print(f"{i:3}. {mark}")


All 172 unique brands:

  1. 
  2. ac
  3. acura
  4. adler
  5. alfa romeo
  6. alpina
  7. amc
  8. ariel
  9. aro
 10. asia
 11. aston martin
 12. auburn
 13. audi
 14. austin
 15. baic
 16. bentley
 17. bmw
 18. borgward
 19. brabus
 20. brilliance
 21. bugatti
 22. buick
 23. byd
 24. cadillac
 25. chana
 26. changan
 27. changfeng
 28. changhe
 29. chery
 30. cheryexeed
 31. chevrolet
 32. chrysler
 33. citroen
 34. dacia
 35. dadi
 36. daewoo
 37. daihatsu
 38. daimler
 39. datsun
 40. delage
 41. derways
 42. dkw
 43. dodge
 44. dongfeng
 45. doninvest
 46. ds
 47. dw hower
 48. eagle
 49. excalibur
 50. faw
 51. ferrari
 52. fiat
 53. fisker
 54. ford
 55. foton
 56. fso
 57. gac
 58. geely
 59. genesis
 60. geo
 61. gmc
 62. great wall
 63. hafei
 64. haima
 65. hanomag
 66. haval
 67. hawtai
 68. heinkel
 69. honda
 70. huanghai
 71. hudson
 72. hummer
 73. hyundai
 74. infiniti
 75. iran khodro
 76. isuzu
 77. iveco
 78. jac
 79. jaguar
 80. jeep
 81. jinbei
 82. jmc
 83. k