# LSA use for SEO(search engine optimization)

In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Sample documents
documents = [
    "SEO is crucial for improving website visibility.",
    "Keyword research is an essential aspect of SEO.",
    "Backlinks play a significant role in search engine optimization.",
    "Content quality is important for SEO success.",
    "SEO tools help in analyzing website performance."
]

In [3]:
# Query
query = "Optimizing content for search engines"

In [4]:
# Preprocess and vectorize documents and query
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(documents)
query_vector = vectorizer.transform([query])

In [5]:
# Apply Latent Semantic Analysis (LSA)
num_topics = 2
lsa_model = TruncatedSVD(n_components=num_topics)
lsa_topic_matrix = lsa_model.fit_transform(X)
query_lsa = lsa_model.transform(query_vector)

In [6]:
# Calculate cosine similarity between query and documents
similarities = cosine_similarity(query_lsa, lsa_topic_matrix)

In [7]:
# Display results
print("Query:", query)
print("\nDocument Similarities:")
for i, sim in enumerate(similarities[0]):
    print(f"Document {i + 1}: Similarity = {sim:.4f}, Content: {documents[i]}")

Query: Optimizing content for search engines

Document Similarities:
Document 1: Similarity = 0.3830, Content: SEO is crucial for improving website visibility.
Document 2: Similarity = 0.3830, Content: Keyword research is an essential aspect of SEO.
Document 3: Similarity = 0.9238, Content: Backlinks play a significant role in search engine optimization.
Document 4: Similarity = 0.3830, Content: Content quality is important for SEO success.
Document 5: Similarity = 0.3830, Content: SEO tools help in analyzing website performance.
