In [1]:
import pandas as pd
import ast


In [3]:
df = pd.read_csv("G2 software - CRM Category Product Overviews.csv")
df.head()


Unnamed: 0,url,product_name,rating,description,product_url,seller,ownership,seller_website,headquarters,total_revenue,...,full_pricing_page,badge,what_is_description,main_category,main_subject,Features,region,country_code,software_product_id,overview_provided_by
0,https://www.g2.com/products/efficy-crm/reviews,Efficy CRM,4.5,The Efficy extendable CRM platform (xCRM) orga...,https://www.efficy.com/efficy-crm-features/,Efficy,,https://www.efficy.com/,"Brussels, Belgium",,...,https://www.g2.com/products/efficy-crm/pricing,,,CRM Software,Home>CRM Software>Efficy CRM>Efficy CRM Reviews,"[{""Category"":""Platform"",""features"":[{""descript...",EU,BE,efficy-crm,Laetitia Baret
1,https://www.g2.com/products/salesboss/reviews,Salesboss,5.0,"SalesBoss is an all-in-one sales, marketing, c...",https://www.salesboss.ai/pricing,Salesboss,,https://www.salesboss.ai/,,,...,https://www.g2.com/products/salesboss/pricing,,,CRM Software,Home>CRM Software>Salesboss>Salesboss Reviews,,,,salesboss,Prit Pal
2,https://www.g2.com/products/desktop-sales-offi...,Desktop Sales Office,3.0,Desktop Sales Office 2007 is a collection of i...,https://www.g2.com/products/desktop-sales-offi...,The CRM Guide,,,,,...,,,,CRM Software,Home>CRM Software>Desktop Sales Office>Desktop...,,,,desktop-sales-office,
3,https://www.g2.com/products/atendare-2018-10-2...,Atendare,5.0,Atendare is a complete marketing and sales pla...,https://www.g2.com/products/atendare-2018-10-2...,Inofly,,,,,...,,,,CRM Software,Home>CRM Software>Atendare>Atendare Reviews,,,,atendare-2018-10-22,
4,https://www.g2.com/products/clinchpad/reviews,ClinchPad,4.8,Close leads faster. Clinch more deals. Simpler...,https://www.g2.com/products/clinchpad/reviews,ClinchPad Technologies Pvt Ltd,,,"New Delhi, IN",,...,https://www.g2.com/products/clinchpad/pricing,,,CRM Software,Home>CRM Software>ClinchPad>ClinchPad Reviews,,AS,IN,clinchpad,"Cheenu MadanFounder, ClinchPad"


In [4]:
df = df.dropna(subset=["Features"]).copy()


In [5]:
def extract_features(feature_str):
    try:
        features_json = ast.literal_eval(feature_str)
        features = []
        for category in features_json:
            for f in category.get("features", []):
                features.append(f.get("description", ""))
        return features
    except Exception:
        return []


In [6]:
df["parsed_features"] = df["Features"].apply(extract_features)


In [7]:
df_cleaned = df[["product_name", "main_category", "parsed_features", "rating"]]
df_cleaned.head()


Unnamed: 0,product_name,main_category,parsed_features,rating
0,Efficy CRM,CRM Software,[Based on 50 Efficy CRM reviews and verified b...,4.5
6,Zurmo,CRM Software,[Allows administrators to customize to accommo...,4.6
7,YetiForce CRM,CRM Software,[Based on 32 YetiForce CRM reviews and verifie...,4.4
18,FinCRM,CRM Software,[Allows administrators to customize to accomod...,4.3
19,Fireberry,CRM Software,[As reported in 14 Fireberry reviews. Allows a...,4.8


In [8]:
df_cleaned.to_csv("cleaned_vendor_data.csv", index=False)


In [None]:
## Step 2: Capability Matching & Similarity Scoring


In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [11]:
def combine_features(features):
    return " ".join(features)


In [12]:
def filter_by_category(df, category):
    return df[df["main_category"].str.lower() == category.lower()].copy()


In [19]:
any("budgeting" in f.lower() for features in df_cleaned["parsed_features"] for f in features)


False

In [38]:
import os
os.environ["USE_TF"] = "0"  # disable TensorFlow entirely

from sentence_transformers import SentenceTransformer


In [52]:

import os
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


try:
    os.environ["USE_TF"] = "0"  
    from sentence_transformers import SentenceTransformer, util
    sbert_available = True
except ImportError:
    print("sentence-transformers not available, using TF-IDF instead.")
    sbert_available = False

# --- Functions ---


def filter_by_category(df, category):
    return df[df["main_category"].str.lower() == category.lower()].copy()


def combine_features(features):
    return " ".join(features) if isinstance(features, list) else str(features)


def compute_tfidf_similarity(text, query):
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([text, query])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

# Computing with the  similarity with SBERT
def compute_sbert_similarity(text, query, model):
    e1, e2 = model.encode([text, query])
    return np.dot(e1, e2) / (np.linalg.norm(e1) * np.linalg.norm(e2))


def match_vendors(df, category, capabilities, threshold=0.6, similarity_method='sbert'):
    print(f"\n🔍 Matching for category: {category} | capabilities: {capabilities} | threshold: {threshold} | method: {similarity_method}")
    df_category = filter_by_category(df, category)
    df_category["feature_text"] = df_category["parsed_features"].apply(combine_features)

    query = " ".join(capabilities)

    if similarity_method == 'sbert' and sbert_available:
        model = SentenceTransformer('all-MiniLM-L6-v2')
        vendor_embeddings = model.encode(df_category["feature_text"].tolist(), convert_to_tensor=True)
        query_embedding = model.encode(query, convert_to_tensor=True)
        cos_scores = util.pytorch_cos_sim(query_embedding, vendor_embeddings)[0]
        df_category["similarity_score"] = cos_scores.cpu().numpy()
    else:
        df_category["similarity_score"] = df_category["feature_text"].apply(lambda x: compute_tfidf_similarity(x, query))

    result_df = df_category[df_category["similarity_score"] >= threshold].sort_values(by="similarity_score", ascending=False)
    return result_df

# this is an  Example Run 


user_category = "CRM Software"
user_capabilities = ["workflow automation", "lead management", "sales tracking"]


results = match_vendors(df_cleaned, user_category, user_capabilities, threshold=0.3)

# Showing the  results
if not results.empty:
    display(results[["product_name", "similarity_score", "rating"]].head(10))
else:
    print("❗ No vendors matched the criteria. Try lowering the threshold or adjusting capabilities.")



🔍 Matching for category: CRM Software | capabilities: ['workflow automation', 'lead management', 'sales tracking'] | threshold: 0.3 | method: sbert


Unnamed: 0,product_name,similarity_score,rating
53,EspoCRM,0.41822,4.6
33,Insightly CRM,0.398574,4.2
23,Solid Performers CRM,0.395829,4.9
43,Kommo,0.383417,3.7
7,YetiForce CRM,0.359125,4.4
56,Prospect CRM,0.359125,4.6
18,FinCRM,0.356429,4.3
36,AllClients,0.355547,4.6
40,Pipeliner CRM,0.35499,4.6
46,Keap,0.350583,4.2


In [None]:
# --- Step 3: Vendor Ranking

In [53]:


def rank_vendors(results_df):
    """
    Given a DataFrame with similarity_score and rating, compute a final ranking score.
    """

    
    if results_df.empty:
        print("❗ No vendors to rank.")
        return results_df

    # Step 1: Normalizing  the vendor rating (because ratings are out of 5)
    results_df["normalized_rating"] = results_df["rating"] / 5

    # Step 2: Calculating  the final combined score
    # Giving 60% weight to similarity score and 40% weight to normalized rating
    results_df["final_score"] = (results_df["similarity_score"] * 0.6) + (results_df["normalized_rating"] * 0.4)

    # Step 3: Sorting  vendors based on final_score (highest first)
    results_df = results_df.sort_values(by="final_score", ascending=False)

    return results_df


In [54]:
# Ranking the  vendors after capability matching
ranked_results = rank_vendors(results)

# Showing the  Top 10 vendors with their scores
ranked_results[["product_name", "similarity_score", "rating", "final_score"]].head(10)


Unnamed: 0,product_name,similarity_score,rating,final_score
23,Solid Performers CRM,0.395829,4.9,0.629497
53,EspoCRM,0.41822,4.6,0.618932
56,Prospect CRM,0.359125,4.6,0.583475
36,AllClients,0.355547,4.6,0.581328
40,Pipeliner CRM,0.35499,4.6,0.580994
19,Fireberry,0.321005,4.8,0.576603
33,Insightly CRM,0.398574,4.2,0.575145
7,YetiForce CRM,0.359125,4.4,0.567475
25,Freshsales,0.342223,4.5,0.565334
6,Zurmo,0.327939,4.6,0.564763


In [None]:
# step 4 : Flask App for Vendor Qualification 

In [None]:
# this part is done in the vscode 