# Search Engine Optimization (SEO) with NLP

In [9]:
import spacy
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

In [10]:
nlp = spacy.load("en_core_web_sm")

search_queries = [
    "buy organic apple juice",
    "cheap coca cola bottles",
    "best price for whole grain pasta",
    "discount on Heinz ketchup",
    "order 1.5L Coca-Cola online",
    "high fiber whole grain pasta",
    "find affordable apple juice"
]

In [11]:
def preprocess_query(query):
    doc = nlp(query.lower())
    return " ".join([token.lemma_ for token in doc if not token.is_stop and token.is_alpha])

processed_queries = [preprocess_query(query) for query in search_queries]

def extract_keywords(queries):
    tfidf_vectorizer = TfidfVectorizer(max_df=0.8, max_features=10)
    tfidf_matrix = tfidf_vectorizer.fit_transform(queries)
    keywords = tfidf_vectorizer.get_feature_names_out()
    
    tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=keywords)
    return tfidf_df, keywords

tfidf_df, keywords = extract_keywords(processed_queries)

print("\nTF-IDF Matrix (Keyword Importance):")
print(tfidf_df)

def analyze_search_patterns(keywords, queries):
    keyword_frequency = {}
    
    # Count keyword frequency in search queries
    for query in queries:
        for keyword in keywords:
            if keyword in query:
                keyword_frequency[keyword] = keyword_frequency.get(keyword, 0) + 1
    
    return keyword_frequency

keyword_frequency = analyze_search_patterns(keywords, processed_queries)

print("\nKeyword Frequency (Search Trends):")
for keyword, freq in keyword_frequency.items():
    print(f"{keyword}: {freq} times")

def recommend_products_based_on_keywords(keyword):
    product_recommendations = {
        "apple": ["Organic Apple Juice", "Cold-Pressed Apple Juice"],
        "pasta": ["Whole Grain Pasta", "High Fiber Pasta"],
        "coca": ["Coca-Cola 1.5L", "Coca-Cola 500ml"],
        "ketchup": ["Heinz Tomato Ketchup", "No-Sugar Heinz Ketchup"]
    }
    return product_recommendations.get(keyword, ["No recommendations available"])



TF-IDF Matrix (Keyword Importance):
   affordable     apple      coca      cola     grain     juice    online   
0    0.000000  0.538281  0.000000  0.000000  0.000000  0.538281  0.000000  \
1    0.000000  0.000000  0.707107  0.707107  0.000000  0.000000  0.000000   
2    0.000000  0.000000  0.000000  0.000000  0.707107  0.000000  0.000000   
3    0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
4    0.000000  0.000000  0.451635  0.451635  0.000000  0.000000  0.544082   
5    0.000000  0.000000  0.000000  0.000000  0.707107  0.000000  0.000000   
6    0.648465  0.538281  0.000000  0.000000  0.000000  0.538281  0.000000   

      order   organic     pasta  
0  0.000000  0.648465  0.000000  
1  0.000000  0.000000  0.000000  
2  0.000000  0.000000  0.707107  
3  0.000000  0.000000  0.000000  
4  0.544082  0.000000  0.000000  
5  0.000000  0.000000  0.707107  
6  0.000000  0.000000  0.000000  

Keyword Frequency (Search Trends):
apple: 2 times
juice: 2 times
organic:

In [12]:
# Provide product recommendations based on most common keyword
most_common_keyword = max(keyword_frequency, key=keyword_frequency.get)
recommendations = recommend_products_based_on_keywords(most_common_keyword)

print(f"\nMost Common Keyword: {most_common_keyword}")
print(f"Recommended Products for '{most_common_keyword}': {recommendations}")


Most Common Keyword: apple
Recommended Products for 'apple': ['Organic Apple Juice', 'Cold-Pressed Apple Juice']
