In [1]:
import math
import re
from collections import defaultdict, Counter
from nltk.stem import PorterStemmer
import nltk

# Download required NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

True

In [2]:
class SimpleIR:
    def __init__(self):
        self.docs = {}
        self.index = defaultdict(dict)
        self.doc_freq = defaultdict(int)
        self.doc_lengths = {}
        self.stopwords = set(nltk.corpus.stopwords.words('english')) - {'plasma', 'lens', 'maternal', 'fetal'}
        self.stemmer = PorterStemmer()

    # Tokenize and preprocess
    def tokenize(self, text):
        words = re.findall(r'[a-z]+', text.lower())
        return [self.stemmer.stem(w) for w in words if w not in self.stopwords and len(w) > 1]

    # Load MED documents
    def load_docs(self, path):
        with open(path, 'r', errors='ignore') as f:
            content = f.read()
        sections = content.split('.I')
        for section in sections[1:]:
            lines = section.strip().split('\n')
            if not lines: continue
            doc_id = lines[0].strip()
            text_lines = [line.strip() for line in lines[1:] if line.strip() and not line.startswith('.')]
            text = ' '.join(text_lines)
            if text:
                self.docs[doc_id] = text
        print(f"Loaded {len(self.docs)} documents")

    # Load MED queries
    def load_queries(self, path):
        queries = {}
        with open(path, 'r', errors='ignore') as f:
            content = f.read()
        sections = content.split('.I')
        for section in sections[1:]:
            lines = section.strip().split('\n')
            if not lines: continue
            qid = lines[0].strip()
            text_lines = [line.strip() for line in lines[1:] if line.strip() and not line.startswith('.')]
            text = ' '.join(text_lines)
            if text:
                queries[qid] = text
        print(f"Loaded {len(queries)} queries")
        return queries

    # Load MED relevance judgments
    def load_qrels(self, path):
        qrels = defaultdict(set)
        with open(path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 2:
                    qid, doc_id = parts[0], parts[1]
                    doc_id_fixed = str(int(doc_id) + 1)
                    qrels[qid].add(doc_id_fixed)
        print(f"Loaded relevance judgments for {len(qrels)} queries")
        return qrels

    # Build inverted index
    def build_index(self):
        for doc_id, text in self.docs.items():
            words = self.tokenize(text)
            freq = Counter(words)
            for word, count in freq.items():
                self.index[word][doc_id] = count
        for word in self.index:
            self.doc_freq[word] = len(self.index[word])
        # Compute document vector lengths
        for doc_id, text in self.docs.items():
            words = self.tokenize(text)
            freq = Counter(words)
            length_sq = 0
            for word, tf in freq.items():
                if word in self.doc_freq:
                    idf = math.log((len(self.docs)+1)/(self.doc_freq[word]+1)) + 1
                    tf_weight = 1 + math.log(tf)
                    length_sq += (tf_weight * idf) ** 2
            self.doc_lengths[doc_id] = math.sqrt(length_sq)
        print(f"Index built with {len(self.index)} unique terms")

    # Search
    def search_with_base(self, query, log_base=10):
        words = self.tokenize(query)
        if not words: return []
        query_freq = Counter(words)
        query_vec = {}
        for word, tf in query_freq.items():
            if word in self.doc_freq:
                idf = math.log((len(self.docs)+1)/(self.doc_freq[word]+1), log_base) + 1
                query_vec[word] = (1 + math.log(tf)) * idf
        query_len = math.sqrt(sum(v**2 for v in query_vec.values()))
        scores = {}
        for word, q_weight in query_vec.items():
            for doc_id, tf in self.index.get(word, {}).items():
                tf_weight = 1 + math.log(tf)
                idf = math.log((len(self.docs)+1)/(self.doc_freq[word]+1), log_base) + 1
                scores[doc_id] = scores.get(doc_id, 0) + (tf_weight * idf) * q_weight
        # Cosine normalization
        for doc_id in scores:
            if self.doc_lengths[doc_id] > 0 and query_len > 0:
                scores[doc_id] /= (self.doc_lengths[doc_id] * query_len)
            else:
                scores[doc_id] = 0
        return sorted(scores.items(), key=lambda x: x[1], reverse=True)

    # Evaluate MAP, MAP@30, precision at recall
    def evaluate(self, queries, qrels, log_bases=[0.1,0.2,0.3,0.5,1.5,2,10], top_k=30):
        recall_levels = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
        all_results = {}
        for lb in log_bases:
            prec_at_recall = {r:[] for r in recall_levels}
            map_scores = []
            map30_scores = []
            for qid, query in queries.items():
                relevant_docs = qrels.get(qid, set())
                if not relevant_docs: continue
                results_docs = self.search_with_base(query, lb)[:top_k]
                retrieved_docs = [doc_id for doc_id,_ in results_docs]
                # Compute precision at each recall level
                num_rel = 0
                precisions = []
                for i, doc_id in enumerate(retrieved_docs):
                    if doc_id in relevant_docs:
                        num_rel += 1
                        precisions.append(num_rel / (i+1))
                        # Map precision to recall level
                        recall = num_rel / len(relevant_docs)
                        for r in recall_levels:
                            if recall >= r:
                                prec_at_recall[r].append(num_rel/(i+1))
                # Average Precision
                ap = sum(precisions)/len(relevant_docs) if len(relevant_docs)>0 else 0
                map_scores.append(ap)
                map30_scores.append(sum(precisions)/min(len(relevant_docs), top_k) if len(relevant_docs)>0 else 0)
            # Average over queries
            avg_prec_at_recall = {r: sum(v)/len(v) if len(v)>0 else 0 for r,v in prec_at_recall.items()}
            avg_map = sum(map_scores)/len(map_scores) if len(map_scores)>0 else 0
            avg_map30 = sum(map30_scores)/len(map30_scores) if len(map30_scores)>0 else 0
            all_results[lb] = {'precision_recall': avg_prec_at_recall, 'MAP': avg_map, 'MAP@30': avg_map30}

        # Display results similar to Tables 4-6
        print("\nEvaluation Summary:")
        for lb,res in all_results.items():
            print(f"\nLog base {lb}: MAP={res['MAP']:.4f}, MAP@30={res['MAP@30']:.4f}")
            print("Recall -> Precision:")
            for r in recall_levels:
                print(f"{r:.1f}: {res['precision_recall'][r]:.3f}", end=" | ")
            print()
        # Identify best, worst, standard
        map30_values = {lb:res['MAP@30'] for lb,res in all_results.items()}
        best_lb = max(map30_values, key=map30_values.get)
        worst_lb = min(map30_values, key=map30_values.get)
        standard_lb = 10
        print(f"\nBest log base (MAP@30): {best_lb}, Standard (log10): {standard_lb}, Worst: {worst_lb}")
        return all_results


In [3]:
# Main execution
def main():
    ir = SimpleIR()
    ir.load_docs(r"C:\Snehal\ir\med\MED.ALL")
    queries = ir.load_queries(r"C:\Snehal\ir\med\MED.QRY")
    qrels = ir.load_qrels(r"C:\Snehal\ir\med\MED.REL")

    ir.build_index()
    results = ir.evaluate(queries, qrels, top_k=30)

if __name__ == "__main__":
    main()

Loaded 1033 documents
Loaded 30 queries
Loaded relevance judgments for 30 queries
Index built with 8877 unique terms

Evaluation Summary:

Log base 0.1: MAP=0.0000, MAP@30=0.0000
Recall -> Precision:
0.0: 0.000 | 0.1: 0.000 | 0.2: 0.000 | 0.3: 0.000 | 0.4: 0.000 | 0.5: 0.000 | 0.6: 0.000 | 0.7: 0.000 | 0.8: 0.000 | 0.9: 0.000 | 1.0: 0.000 | 

Log base 0.2: MAP=0.0000, MAP@30=0.0000
Recall -> Precision:
0.0: 0.000 | 0.1: 0.000 | 0.2: 0.000 | 0.3: 0.000 | 0.4: 0.000 | 0.5: 0.000 | 0.6: 0.000 | 0.7: 0.000 | 0.8: 0.000 | 0.9: 0.000 | 1.0: 0.000 | 

Log base 0.3: MAP=0.0000, MAP@30=0.0000
Recall -> Precision:
0.0: 0.000 | 0.1: 0.000 | 0.2: 0.000 | 0.3: 0.000 | 0.4: 0.000 | 0.5: 0.000 | 0.6: 0.000 | 0.7: 0.000 | 0.8: 0.000 | 0.9: 0.000 | 1.0: 0.000 | 

Log base 0.5: MAP=0.0000, MAP@30=0.0000
Recall -> Precision:
0.0: 0.000 | 0.1: 0.000 | 0.2: 0.000 | 0.3: 0.000 | 0.4: 0.000 | 0.5: 0.000 | 0.6: 0.000 | 0.7: 0.000 | 0.8: 0.000 | 0.9: 0.000 | 1.0: 0.000 | 

Log base 1.5: MAP=0.0012, MAP@30=0.00