In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

#  dataset
startup_data = pd.DataFrame({
    'startup_name': ['Aadar', 'Vedix', 'Ayuga','Bibo','panthanjali','mama earth'],
    'description': ['  Addar is an Ayurveda healthcare company that develops natural, effective products in-house.', ' first brand offering personalized ayurvedic haircare and skincare regimen', 'Internet first brand offering ayurvedic multi-category beauty products','Creates sustainable products with extracts of plants, cold pressed oils, natural plant butter','company that sells ayurvedic medicine and beauty products','Mamaearth is an Indian startup in the beauty care industry']
})

product_data = pd.DataFrame({
    'product_name': ['oil', 'face wash', 'Shampoos','serum','tooth paste','oil'],
    'description': ['Ayurvedic oil is made by combining herbal extracts with a carrier oil, such as coconut oil, sesame oil, or almond oil.', 'Help to moisturize the skin, reduce wrinkles, and improve acne','Ayurvedic shampoo is made with natural ingredients, such as amla, neem, shikakai, and brahmi','Ayurvedic serum is lightweight and absorbs quickly into the skin, without leaving a greasy residue','Toothpaste is a paste or gel used to clean teeth and freshen breath.','yurvedic oil can help to improve hair growth, prevent hair loss, and treat dandruff']})


# Create TF-IDF vectors for product descriptions
tfidf_vectorizer = TfidfVectorizer()
product_tfidf_matrix = tfidf_vectorizer.fit_transform(product_data['description'])

# Calculate cosine similarity between products based on descriptions
cosine_similarities = linear_kernel(product_tfidf_matrix, product_tfidf_matrix)

In [None]:
# Function to recommend startups based on a product name
def recommend_startups(product_name, num_recommendations=5):
    product_index = product_data[product_data['product_name'] == product_name].index[0]
    similarity_scores = list(enumerate(cosine_similarities[product_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_recommendations = similarity_scores[1:num_recommendations+1]  # Exclude the product itself

    recommended_startups = []
    for idx, score in top_recommendations:
        recommended_startups.append({
            'startup_name': startup_data.iloc[idx]['startup_name'],
            'description': startup_data.iloc[idx]['description'],
            'similarity_score': score
        })

    return recommended_startups

In [None]:
# Example usage:
product_name = 'oil'
recommendations = recommend_startups(product_name, num_recommendations=3)
print(f"Recommended startups for {product_name}:")
for rec in recommendations:
    print(f"Startup: {rec['startup_name']} (Similarity Score: {rec['similarity_score']:.2f})")

Recommended startups for oil:
Startup: Ayuga (Similarity Score: 0.20)
Startup: mama earth (Similarity Score: 0.17)
Startup: panthanjali (Similarity Score: 0.06)
