In [1]:
import math

# section 1
def computeTF(t, d):
    # Count the number of times term t appears in document d
    count_t_d = d.count(t)
    # Calculate the total number of terms in document d
    size_d = len(d.split())
    # Calculate TF
    tf = count_t_d / size_d
    return tf

In [2]:
# section 2
def computeDF(t, C):
    # Count the number of documents containing the term t
    count_t_C = sum(1 for document in C if t in document)
    # Calculate DF
    df = count_t_C
    return df


In [18]:
# section 3
def computeIDF(t, C):
    # Total number of documents
    N = len(C)
    # Number of documents containing the term t
    df_t = computeDF(t, C)
    # Calculate IDF with better smoothing
    idf = math.log(1 + (N / (df_t + 1)))
    return idf


In [19]:
# section 4
def computeTFIDF(t, d, C):
    # Calculate TF
    tf = computeTF(t, d)
    # Calculate IDF
    idf = computeIDF(t, C)
    # Calculate TF-IDF
    tfidf = tf * idf
    return tfidf


In [22]:
#section 5
corpuses = [
    "Wellington is the capital city of New Zealand.",
    "New Zealand is famous for its rugby team, the All Blacks.",
    "Auckland is the largest city in New Zealand.",
    "The Maori culture is an integral part of New Zealand's identity.",
    "New Zealand's landscapes were featured in the Lord of the Rings films."
]

# Query
query = "What is the capital of New Zealand?"

# Preprocess query
query_terms = query.lower().replace("?", "").split()

# Calculate TF-IDF scores for each sentence
sentence_scores = []

for sentence in corpuses:
    score = 0
    for term in query_terms:
        tfidf = computeTFIDF(term, sentence.lower(), corpuses)
        score += tfidf
    sentence_scores.append((sentence, score))

# Sort sentences by score in descending order
ranked_sentences = sorted(sentence_scores, key=lambda x: x[1], reverse=True)

# Print the top 3 sentences
print("Top 3 sentences:")
for sentence, score in ranked_sentences[:3]:
    print(f"Score: {score:.4f} - Sentence: {sentence}")

Top 3 sentences:
Score: 0.8792 - Sentence: Wellington is the capital city of New Zealand.
Score: 0.6212 - Sentence: Auckland is the largest city in New Zealand.
Score: 0.5255 - Sentence: The Maori culture is an integral part of New Zealand's identity.
