In [4]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.stem.porter import PorterStemmer

In [5]:
df=pd.read_csv('usable_dataset1.csv')
df.head()

Unnamed: 0,Tool Name,Category,Features,Pricing,Reviews,Target Audience
0,Deepcrawl,"SEO, keyword research, competitive analysis",A usability testing platform that helps you ge...,Starts at $499/month,"4.9/5 stars (5,000+reviews)",Businesses of all sizes
1,SearchAtlas,"SEO, keyword research, competitive analysis",An SEO tool that helps you understand your web...,Starts at $299/month,"4.9/5 stars (1,000+reviews)",Businesses of all sizes
2,SpyFu,"SEO, keyword research, competitive analysis",An SEO tool that helps you research your compe...,Starts at $39/month,"4.8/5 stars (10,000+reviews)",Businesses of all sizes
3,Answer The Public,"SEO, keyword research, competitive analysis",A free SEO tool that helps you find questions ...,Free,"4.9/5 stars (100,000+reviews)",Businesses of all sizes
4,SEO Site Checkup,"SEO, keyword research, competitive analysis",A website crawler that helps you identify SEO ...,Starts at $19.99/month,4.4/5 stars (1000+ reviews),Businesses of all sizes


In [6]:
df.isnull().sum()

Tool Name          0
Category           0
Features           0
Pricing            0
Reviews            0
Target Audience    0
dtype: int64

In [7]:
df['Reviews'][38]


'4/5 stars (500+ reviews)'

In [8]:
df['Reviews'][37]

'3.9/5 stars (10,000+ reviews)'

In [9]:
def extract_price(price_str):
    match = re.search(r'[\d.]+', price_str)
    if match:
        return float(match.group())
    else:
        return None

In [10]:
# Apply the function to the "Price" column
df['Pricing'] = df['Pricing'].apply(extract_price)

# Remove non-numeric characters from the "Rating" column and convert to numeric
df['Reviews'] = df['Reviews'].str.extract(r'(\d+(?:\.\d+)?)').astype(float)

In [11]:
df['Pricing'].fillna('free', inplace=True)

In [12]:
df.head()

Unnamed: 0,Tool Name,Category,Features,Pricing,Reviews,Target Audience
0,Deepcrawl,"SEO, keyword research, competitive analysis",A usability testing platform that helps you ge...,499.0,4.9,Businesses of all sizes
1,SearchAtlas,"SEO, keyword research, competitive analysis",An SEO tool that helps you understand your web...,299.0,4.9,Businesses of all sizes
2,SpyFu,"SEO, keyword research, competitive analysis",An SEO tool that helps you research your compe...,39.0,4.8,Businesses of all sizes
3,Answer The Public,"SEO, keyword research, competitive analysis",A free SEO tool that helps you find questions ...,free,4.9,Businesses of all sizes
4,SEO Site Checkup,"SEO, keyword research, competitive analysis",A website crawler that helps you identify SEO ...,19.99,4.4,Businesses of all sizes


In [13]:
df.isnull().sum()

Tool Name          0
Category           0
Features           0
Pricing            0
Reviews            0
Target Audience    0
dtype: int64

In [14]:
df['Category']=df['Category'].apply(lambda x:[x])


In [15]:
df['Target Audience']=df['Target Audience'].apply(lambda x:[x])

In [16]:
df['Features']=df['Features'].apply(lambda x:x.split(','))

In [17]:
df['Tags']=df['Category']+df['Features']+df['Target Audience']
df.head()

Unnamed: 0,Tool Name,Category,Features,Pricing,Reviews,Target Audience,Tags
0,Deepcrawl,"[SEO, keyword research, competitive analysis]",[A usability testing platform that helps you g...,499.0,4.9,[Businesses of all sizes],"[SEO, keyword research, competitive analysis, ..."
1,SearchAtlas,"[SEO, keyword research, competitive analysis]",[An SEO tool that helps you understand your we...,299.0,4.9,[Businesses of all sizes],"[SEO, keyword research, competitive analysis, ..."
2,SpyFu,"[SEO, keyword research, competitive analysis]",[An SEO tool that helps you research your comp...,39.0,4.8,[Businesses of all sizes],"[SEO, keyword research, competitive analysis, ..."
3,Answer The Public,"[SEO, keyword research, competitive analysis]",[A free SEO tool that helps you find questions...,free,4.9,[Businesses of all sizes],"[SEO, keyword research, competitive analysis, ..."
4,SEO Site Checkup,"[SEO, keyword research, competitive analysis]",[A website crawler that helps you identify SEO...,19.99,4.4,[Businesses of all sizes],"[SEO, keyword research, competitive analysis, ..."


In [18]:
new_df=df[['Tool Name','Tags']]

In [19]:
new_df.head()

Unnamed: 0,Tool Name,Tags
0,Deepcrawl,"[SEO, keyword research, competitive analysis, ..."
1,SearchAtlas,"[SEO, keyword research, competitive analysis, ..."
2,SpyFu,"[SEO, keyword research, competitive analysis, ..."
3,Answer The Public,"[SEO, keyword research, competitive analysis, ..."
4,SEO Site Checkup,"[SEO, keyword research, competitive analysis, ..."


In [20]:
new_df['Tags']=new_df['Tags'].apply(lambda x:" ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Tags']=new_df['Tags'].apply(lambda x:" ".join(x))


In [21]:
new_df['Tags'].apply(lambda x:x.lower())

0     seo, keyword research, competitive analysis a ...
1     seo, keyword research, competitive analysis an...
2     seo, keyword research, competitive analysis an...
3     seo, keyword research, competitive analysis a ...
4     seo, keyword research, competitive analysis a ...
                            ...                        
95    project management a project management tool t...
96    project management a powerful project manageme...
97    project management a project management tool t...
98    project management a project management tool t...
99    project management a project management tool t...
Name: Tags, Length: 100, dtype: object

In [22]:
new_df.to_csv('Tags_Data.csv',index=False)

In [36]:
tf= TfidfVectorizer(max_features=200,stop_words='english')

In [37]:
vector=tf.fit_transform(new_df['Tags']).toarray()

In [38]:
vector[0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.15116748, 0.        ,
       0.        , 0.        , 0.        , 0.30006228, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.12192763, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.15671347, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [40]:
feature_names = tf.get_feature_names_out()

In [41]:
feature_names

array(['accounts', 'accurately', 'activities', 'advanced', 'advertising',
       'affordable', 'agencies', 'allows', 'analysis', 'analytical',
       'analytics', 'analyze', 'api', 'app', 'asking', 'attract',
       'audience', 'auditing', 'authentication', 'automate', 'automated',
       'automation', 'backlink', 'backlinks', 'based', 'big', 'boards',
       'brand', 'bugs', 'build', 'businesses', 'calls', 'campaigns',
       'capsule', 'charts', 'chat', 'check', 'cloud', 'collaborate',
       'collaboration', 'combines', 'communication', 'competition',
       'competitive', 'competitor', 'competitors', 'contacts', 'content',
       'convert', 'crawler', 'create', 'crm', 'custom', 'customer',
       'customers', 'customizable', 'dashboards', 'data', 'deals',
       'deliverability', 'design', 'develop', 'development', 'difficult',
       'documents', 'ease', 'easy', 'email', 'emails', 'engagement',
       'engine', 'enterprise', 'features', 'focus', 'free', 'freemium',
       'great',

In [53]:
from sklearn.metrics.pairwise import cosine_similarity

In [54]:
similarity=cosine_similarity(vector)

In [55]:
def find_tool(query, num_results=5):
    # Convert the query to lowercase and tokenize it
    query_vector = tf.transform([query.lower()]).toarray()

    # Calculate cosine similarity between the query and all tools
    similarities = cosine_similarity(query_vector, vector)

    # Find the indices of the top N most similar tools
    most_similar_indices = np.argsort(similarities[0])[::-1][:num_results]

    # Get the names of the most similar tools
    most_similar_tool_names = [new_df.iloc[index]['Tool Name'] for index in most_similar_indices]

    return most_similar_tool_names

In [56]:
def find_info(tool_names):
    df_info = pd.read_csv('usable_dataset1.csv')

    # Filter the tool information DataFrame based on the provided tool names
    tool_information = df_info[df_info['Tool Name'].isin(tool_names)]

    # Create a dictionary to map tool names to their information
    tool_info_dict = {row['Tool Name']: row for _, row in tool_information.iterrows()}

    # Create a list of tool information in the order of tool_names
    ordered_tool_info = [tool_info_dict[tool_name] for tool_name in tool_names]

    return ordered_tool_info

In [60]:
user_query = "give me a leads management tool"
top_similar_tools = find_tool(user_query, num_results=5)
tool_info=find_info(top_similar_tools)

In [61]:
top_similar_tools

['FreeAgent CRM',
 'Salesflare',
 'Insightly CRM Lite',
 "A Salesman's CRM",
 'Hive']

In [62]:
tool_info

[Tool Name                                              FreeAgent CRM
 Category                                    CRM tool to manage leads
 Features           A CRM tool that helps you track your leads, op...
 Pricing                                          Starts at $10/month
 Reviews                                 4.5/5 stars (1,500+ reviews)
 Target Audience                              Businesses of all sizes
 Name: 25, dtype: object,
 Tool Name                                                 Salesflare
 Category                                    CRM tool to manage leads
 Features           A CRM tool that helps you track your leads, op...
 Pricing                                     Starts at $49/user/month
 Reviews                                 4.8/5 stars (3,000+ reviews)
 Target Audience                              Businesses of all sizes
 Name: 24, dtype: object,
 Tool Name                                         Insightly CRM Lite
 Category                             

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
import webbrowser
import uvicorn
app = FastAPI(docs_url='/cap/docs', redoc_url='/cap/redocs',openapi_url='/cap/openapi.json')
origins = ["*"]
app.add_middleware(
CORSMiddleware,
    # allow_origins=origins,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
class QuestionInput(BaseModel):
    question: str



@app.post("/cap")
async def predict(question_input: QuestionInput):
    try:
        question = question_input.question
        print(question)
        out= find_tool(question)
        result=find_info(out)
        return result
    except Exception as e:
        return {"error": str(e)}
