In [106]:
import re
import collections
import math
import json
import numpy as np
from collections import Counter
from hazm import Normalizer, word_tokenize, Stemmer

In [107]:
def load_docs():
    docs = {}
    contents = []
    urls = []
    with open("IR_data_news_12k.json", 'r') as file:
        docs = json.load(file)
        for key in docs.keys():
            idx = str(key)
            docs[idx] = {'title': docs[idx]['title'],
                         'content': docs[idx]['content'],
                         'url': docs[idx]['url'],
                         }
            contents.append(docs[idx]['content'])
    return docs, contents, urls

docs, contents, urls = load_docs()
docs['1']

{'title': 'سجادی :حضور تماشاگران در  لیگ برتر فوتبال تابع نظر فدراسیون  و سازمان لیگ است',
 'content': '\nبه گزارش خبرگزاری فارس، سید حمید سجادی در حاشیه مراسم گرامیداشت روز جوان در جمع خبرنگاران در رابطه با عرضه سهام سرخابی\u200cها در بورس اظهار داشت: منتظر طی روند هستیم و بعدا اطلاع رسانی خواهیم کرد. وی در مورد حضور تماشاگران در مسابقات فوتبال اظهار داشت:\xa0حضور تماشاگران در\xa0 لیگ برتر فوتبال تابع نظر فدراسیون ،سازمان لیگ و ستاد ملی مبارزه با کرونا است. انتهای پیام/\n\n\n',
 'url': 'https://www.farsnews.ir/news/14001224000982/سجادی-حضور-تماشاگران-در-لیگ-برتر-فوتبال-تابع-نظر-فدراسیون-و-سازمان'}

In [130]:
ABBREVIATIONS_FILE = "abbreviations.txt"

SPACING_PATTERNS = [
    (r"(\S)(ها|هاي|هایی|تر|تري|ترین|گر|گري|ام|ات|اش)(\s|$)", r"\1‌\2 "),
    (r"\b(می|نمی)\s+(\S)", r"\1‌\2"),
]

EXTRA_PUNCT_PATTERN = r"[!<>.,؛،:\-–_=+(){}\[\]…\"\'?؟«»٪%]+"
normalizer = Normalizer()


def load_abbreviations(file_path):
    """Loads abbreviations from the file into a dictionary."""
    abbreviations = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                entry = eval(line.strip()) 
                abbreviations.update(entry)
            except Exception as e:
                print(f"Error parsing line: {line.strip()} - {e}")
    return abbreviations


def expand_abbreviations(text, abbreviations):
    """Expands abbreviations using the loaded abbreviations dictionary."""
    for short_form, expansion in abbreviations.items():
        pattern = fr"\b{re.escape(short_form)}\b"
        text = re.sub(pattern, expansion, text)
    return text


def spacing_correction(text):
    """Applies spacing corrections."""
    for pattern, repl in SPACING_PATTERNS:
        text = re.sub(pattern, repl, text)
    return text


def remove_punct(text):
    """Removes diacritics and extra punctuation."""
    text = re.sub(EXTRA_PUNCT_PATTERN, "", text) 
    return text


abbreviations = load_abbreviations(ABBREVIATIONS_FILE)

def preprocess_single_text(text, expand_abbr=True, preserve_email_id=True, do_spacing_corr=True,
                           remove_diacritics=True):
    if preserve_email_id:
        text = clean_emails(text)
   
    if expand_abbr:
        text = expand_abbreviations(text, abbreviations)

    text = normalizer.normalize(text)  

    if do_spacing_corr:
        text = spacing_correction(text)

    if remove_diacritics:
        text = remove_punct(text)

    tokens = word_tokenize(text)  
    return tokens

In [131]:
def test_normalization():
    """Tests the normalization pipeline with various sentences."""
    test_sentences = [
        "لطفاً با ایمیل example@example.com تماس بگیرید.",
        "پیام از کاربر @user123 در مورد پروژه دریافت شد.",
        "من   می خواهم که نمی روم  به این مکان. همچنین کتاب‌هایم  ترمیم شدند.",
        "كاف و يای عربی به شکل فارسی تبدیل می‌شود. بسم‌الله الرّحمن الرّحيم.",
        "إِنَّ اللهَ غَفورٌ رَحيمٌ.",
        "سلام! این یک جمله است... یا شاید جمله‌ای دیگر؟!",
        "شماره تماس 1234567890 به شماره ۱۲۳۴۵۶۷۸۹۰ تبدیل شود.",
        "در سال‌های گذشته کتاب‌های زیادی خواندم. سال‌ها می‌گذرد و کتاب‌ها تکرار می‌شوند.",
        "بِسْمِ اللَّهِ الرَّحْمَٰنِ الرَّحِيمِ و ﷽.",
        "آیا !؟ و ... را باید حذف کنیم؟",
        "نمی‌شود \"می‌خواهم\" را به دو توکن تبدیل کرد.",
        "Contact me at user@example.edu. شماره من ۱۲۳ است."
        "ه.ش."
    ]
    
    for i, sentence in enumerate(test_sentences, 1):
        print(f"Test {i}:")
        print("Original:", sentence)
        tokens = preprocess_single_text(sentence)
        print("Processed Tokens:", tokens)
        print("-" * 50)

# Run the test function
test_normalization()

Test 1:
Original: لطفاً با ایمیل example@example.com تماس بگیرید.
Processed Tokens: ['لطفا', 'با', 'ایمیل', 'EMAILexample', 'at', 'example', 'dot', 'comEMAIL', 'تماس', 'بگیرید']
--------------------------------------------------
Test 2:
Original: پیام از کاربر @user123 در مورد پروژه دریافت شد.
Processed Tokens: ['پی\u200cام', 'از', 'کاربر', '@user', '۱۲۳', 'در', 'مورد', 'پروژه', 'دریافت', 'شد']
--------------------------------------------------
Test 3:
Original: من   می خواهم که نمی روم  به این مکان. همچنین کتاب‌هایم  ترمیم شدند.
Processed Tokens: ['من', 'می\u200cخواهم', 'که', 'نمی\u200cروم', 'به', 'این', 'مکان', 'همچنین', 'کتاب\u200cهایم', 'ترمیم', 'شدند']
--------------------------------------------------
Test 4:
Original: كاف و يای عربی به شکل فارسی تبدیل می‌شود. بسم‌الله الرّحمن الرّحيم.
Processed Tokens: ['کاف', 'و', 'یای', 'عربی', 'به', 'شکل', 'فارسی', 'تبدیل', 'می\u200cشود', 'بسم\u200cالله', 'الرحمن', 'الرحیم']
--------------------------------------------------
Test 5:
Original:

In [110]:
stemmer = Stemmer()

def stem_tokens(tokens):
    """Applies stemming to a list of tokens."""
    return [stemmer.stem(t) for t in tokens]

def compute_top_k_frequent(tokens, k):
    """Computes the top-K most frequent tokens along with their frequencies."""
    token_counts = Counter(tokens)
    sorted_tokens = sorted(token_counts.items(), key=lambda x: x[1], reverse=True)
    return dict(sorted_tokens[:k])

def simple_preprocess(content, top_k_tokens=None):
    """
    Simplified preprocessing pipeline for a single document:
      - Normalize and tokenize the content.
      - Stem tokens.
      - Optionally remove tokens present in `top_k_tokens`.
    """
    # Step 1: Normalize and tokenize the content
    tokens = preprocess_single_text(content)  

    # Step 2: Stem tokens
    tokens = stem_tokens(tokens) 

    # Step 3: Remove top-K frequent tokens if provided
    if top_k_tokens:
        tokens = [token for token in tokens if token not in top_k_tokens]

    return tokens

def preprocess_all_docs(docs, top_k=50):
    """
    Preprocesses all documents while returning the same structure as required:
      - Normalizes, tokenizes, and stems content for each document.
      - Computes top-K frequent tokens across all documents.
      - Removes top-K frequent tokens from each document's content.
    """
    combined_tokens = []
    all_tokens = {}

    for doc_id, doc_data in docs.items():
        content = doc_data['content']
        tokens = simple_preprocess(content) 
        all_tokens[doc_id] = tokens
        combined_tokens.extend(tokens)

    top_k_tokens_with_counts = compute_top_k_frequent(combined_tokens, top_k)
    top_k_tokens = set(top_k_tokens_with_counts.keys())  # Extract tokens only

    for doc_id in all_tokens:
        filtered_tokens = [token for token in all_tokens[doc_id] if token not in top_k_tokens]
        docs[doc_id]['content'] = filtered_tokens

    return docs, top_k_tokens_with_counts

In [111]:
docs, contents, _ = load_docs()

pre_processed_docs, top_k_tokens_with_counts = preprocess_all_docs(docs, top_k=20)

print("\nTop-K Frequent Tokens:")
for token, count in top_k_tokens_with_counts.items():
    print(f"Token: {token}, Count: {count}")

print("\nProcessed Document Tokens:")
for doc_id, doc_data in list(pre_processed_docs.items())[:5]: 
    print(f"Doc {doc_id} final tokens:", doc_data['content'])


Top-K Frequent Tokens:
Token: و, Count: 234908
Token: در, Count: 165135
Token: به, Count: 136045
Token: از, Count: 92977
Token: این, Count: 83094
Token: که, Count: 75480
Token: با, Count: 69233
Token: را, Count: 68677
Token: اس, Count: 48513
Token: برا, Count: 31029
Token: کرد, Count: 26941
Token: آن, Count: 24578
Token: ه, Count: 24415
Token: یک, Count: 22864
Token: کشور, Count: 22345
Token: ت, Count: 22173
Token: ما, Count: 19785
Token: خود, Count: 18861
Token: بر, Count: 18807
Token: شد, Count: 17270

Processed Document Tokens:
Doc 0 final tokens: ['گزار', 'خبرگزار', 'فارس', 'کنفدراسیون', 'فوتبال', 'آسیا', 'AFC', 'نامه', 'رسم', 'فدراسیون', 'فوتبال', 'ایر', 'باشگاه', 'گیت', 'پسند', 'ز', 'قرعه\u200cکش', 'ج', 'باشگاه', 'فوتسال', 'آسیا', 'رسما', 'اعل', 'اساس', '۲۵', 'فروردین\u200cماه', '۱۴۰۱', 'مراس', 'قرعه\u200cکش', 'ج', 'باشگاه', 'فوتسال', 'آسیا', 'مالز', 'برگزار', 'می\u200cشود', 'باشگاه', 'گیت', 'پسند', 'بعنو', 'قهر', 'فوتسال', 'ایر', 'سال', '۱۴۰۰', 'مسابق', 'راه', 'پیدا', 'کرده_اس'

In [112]:
pre_processed_docs['1']

{'title': 'سجادی :حضور تماشاگران در  لیگ برتر فوتبال تابع نظر فدراسیون  و سازمان لیگ است',
 'content': ['گزار',
  'خبرگزار',
  'فارس',
  'سید',
  'حمید',
  'سجاد',
  'حاشیه',
  'مراس',
  'گرامیدا',
  'روز',
  'جو',
  'جمع',
  'خبرنگار',
  'رابطه',
  'عرضه',
  'سه',
  'سرخابی\u200c',
  'بورس',
  'اظهار',
  'دا',
  'منتظر',
  'ط',
  'روند',
  'هست',
  'بعدا',
  'اطلاع\u200cرسان',
  'خواهیم_کرد',
  'مورد',
  'حضور',
  'تماشاگر',
  'مسابق',
  'فوتبال',
  'اظهار',
  'دا',
  'حضور',
  'تماشاگر',
  'لیگ',
  'فوتبال',
  'تابع',
  'نظر',
  'فدراسیون',
  'ساز',
  'لیگ',
  'ستاد',
  'مل',
  'مبارزه',
  'کرونا',
  'انت',
  'پی',
  '/'],
 'url': 'https://www.farsnews.ir/news/14001224000982/سجادی-حضور-تماشاگران-در-لیگ-برتر-فوتبال-تابع-نظر-فدراسیون-و-سازمان'}

In [113]:
def _add_new_token_posting(token_dict, token, doc_id, position):
    """
    Initialize a new posting entry for a token that has not yet been seen.
    """
    token_dict[token] = {
        'frequency': 1,
        'docs': {
            doc_id: {
                'positions': [position],
                'number_of_token': 1
            }
        }
    }

def _update_existing_token_posting(token_dict, token, doc_id, position):
    """
    Update an existing token entry with a new position in an existing or new document.
    """
    token_dict[token]['frequency'] += 1
    
    if doc_id in token_dict[token]['docs']:
        token_dict[token]['docs'][doc_id]['positions'].append(position)
        token_dict[token]['docs'][doc_id]['number_of_token'] += 1
    else:
        token_dict[token]['docs'][doc_id] = {
            'positions': [position],
            'number_of_token': 1
        }

def _build_postings_dict(Docs):
    """
    Build a postings dictionary from the input Docs, collecting token frequency
    and positions per document.
    """
    token_dict = {}
    for doc_id, doc_content in Docs.items():
        for position, token in enumerate(doc_content['content']):
            if token in token_dict:
                _update_existing_token_posting(token_dict, token, doc_id, position)
            else:
                _add_new_token_posting(token_dict, token, doc_id, position)
    return token_dict

def _calculate_tf_idf(token_dict, total_docs):
    """
    Calculate TF-IDF for each token in each document.
    """
    for term, term_data in token_dict.items():
        term_docs = term_data['docs']
        n_t = len(term_docs)  # Number of documents containing this term

        for doc_id, doc_info in term_docs.items():
            tf = doc_info['number_of_token']
            # TF-IDF = log10(N / n_t) * (1 + log10(tf))
            tf_idf_value = (np.log10(total_docs / n_t)) * (1 + np.log10(tf))
            doc_info['tf_idf'] = tf_idf_value

def _build_champions_list_and_docs_vectors(token_dict, champ_len):
    """
    Create the champions list for each term by sorting documents based on number_of_token,
    and build the docs_vectors structure for future usage.
    """
    docs_vectors = {}

    for term, term_data in token_dict.items():
        term_docs = term_data['docs']

        # Sort documents by number_of_token (descending) for the champion list
        sorted_term_docs = sorted(
            term_docs,
            key=lambda d: term_docs[d]['number_of_token'],
            reverse=True
        )

        # Build champion list (take top `champ_len` if needed)
        champions_list = {}
        for doc_id in sorted_term_docs:
            champions_list[doc_id] = {
                'number_of_token': term_docs[doc_id]['number_of_token'],
                'tf_idf': term_docs[doc_id]['tf_idf']
            }

        if champ_len < len(term_docs):
            champions_list = dict(list(champions_list.items())[:champ_len])

        token_dict[term]['champions_list'] = champions_list

        # Populate docs_vectors
        for doc_id, doc_info in term_docs.items():
            if doc_id not in docs_vectors:
                docs_vectors[doc_id] = {}
            docs_vectors[doc_id][term] = {
                'tf_idf': doc_info['tf_idf'],
                'tf': doc_info['number_of_token']
            }

    return docs_vectors

def Postings_List(Docs, champ_len):
    """
    Orchestrates the creation of the postings list (token_dict) and docs_vectors.
    Steps:
      1) Build an initial postings dictionary with token frequencies/positions.
      2) Compute TF-IDF for each token in each document.
      3) Build the champions lists and docs_vectors.
    """
    # Step 1: Build the core postings dictionary
    token_dict = _build_postings_dict(Docs)

    # Step 2: Calculate TF-IDF
    total_docs = len(Docs)
    _calculate_tf_idf(token_dict, total_docs)

    # Step 3: Create champions lists and docs_vectors
    docs_vectors = _build_champions_list_and_docs_vectors(token_dict, champ_len)

    return token_dict, docs_vectors


In [114]:
dictionary, docs_vectors = Postings_List(pre_processed_docs, 20)

In [115]:
docs_vectors['8535']

{'گزار': {'tf_idf': 0.06822999816007738, 'tf': 1},
 'خبرگزار': {'tf_idf': 0.006888013253462664, 'tf': 1},
 'مراس': {'tf_idf': 1.3082797702727251, 'tf': 1},
 'برگزار': {'tf_idf': 0.7236736350395699, 'tf': 2},
 'می\u200cشود': {'tf_idf': 0.7264740996744733, 'tf': 3},
 'عنو': {'tf_idf': 0.49503246937512, 'tf': 1},
 'مق': {'tf_idf': 1.1250099265899205, 'tf': 1},
 'انت': {'tf_idf': 0.004435995384070603, 'tf': 1},
 'پی': {'tf_idf': 0.003000072495857743, 'tf': 1},
 '/': {'tf_idf': 0.004364086371255635, 'tf': 1},
 'ساز': {'tf_idf': 0.7613260376849612, 'tf': 1},
 'مل': {'tf_idf': 0.486329764964978, 'tf': 1},
 'سو': {'tf_idf': 0.6778518952477012, 'tf': 1},
 'زیر': {'tf_idf': 0.8060803276103631, 'tf': 1},
 'برنامه': {'tf_idf': 0.7438109781030208, 'tf': 1},
 'ورز': {'tf_idf': 1.1212293196304566, 'tf': 1},
 'امور': {'tf_idf': 1.006526552989648, 'tf': 1},
 'رقاب': {'tf_idf': 1.371263662807911, 'tf': 1},
 'نه': {'tf_idf': 0.9870957429704109, 'tf': 1},
 'میزبان': {'tf_idf': 1.4024838899048566, 'tf': 1}

In [116]:
dictionary['فارس']['champions_list']

{'7435': {'number_of_token': 64, 'tf_idf': 0.0059070807944042346},
 '11697': {'number_of_token': 36, 'tf_idf': 0.0053810823065394255},
 '163': {'number_of_token': 35, 'tf_idf': 0.005355328462095761},
 '6404': {'number_of_token': 35, 'tf_idf': 0.005355328462095761},
 '1322': {'number_of_token': 33, 'tf_idf': 0.00530153641698586},
 '1633': {'number_of_token': 31, 'tf_idf': 0.00524438024289291},
 '6755': {'number_of_token': 27, 'tf_idf': 0.00511808306260702},
 '7584': {'number_of_token': 26, 'tf_idf': 0.005083580816288167},
 '821': {'number_of_token': 25, 'tf_idf': 0.005047725200337222},
 '2388': {'number_of_token': 24, 'tf_idf': 0.0050104056913280555},
 '7744': {'number_of_token': 24, 'tf_idf': 0.0050104056913280555},
 '525': {'number_of_token': 19, 'tf_idf': 0.00479683475620343},
 '8680': {'number_of_token': 16, 'tf_idf': 0.004639729076116687},
 '10183': {'number_of_token': 16, 'tf_idf': 0.004639729076116687},
 '10025': {'number_of_token': 15, 'tf_idf': 0.00458072789429455},
 '2831': {'

In [137]:
dictionary['استرا']

{'frequency': 3,
 'docs': {'325': {'positions': [52],
   'number_of_token': 1,
   'tf_idf': 3.7854010249923875},
  '4430': {'positions': [272, 383],
   'number_of_token': 2,
   'tf_idf': 4.924920279132276}},
 'champions_list': {'4430': {'number_of_token': 2,
   'tf_idf': 4.924920279132276},
  '325': {'number_of_token': 1, 'tf_idf': 3.7854010249923875}}}

In [118]:
len(dictionary)

48166

In [119]:
half_length = len(dictionary) // 2
first_half = {key: dictionary[key] for key in list(dictionary.keys())[:half_length]}
second_half = {key: dictionary[key] for key in list(dictionary.keys())[half_length:]}

with open('first_half.json', "w", encoding="utf-8") as first_file:
    json.dump(first_half, first_file, indent=4)

with open('second_half.json', "w", encoding="utf-8") as second_file:
    json.dump(second_half, second_file, indent=4)

In [120]:
def calculate_tf_idf(f_td, N, n_t):
    tf = 1 + np.log10(f_td)
    idf = np.log10(N / n_t)
    return tf * idf


def vector_length(vector_dict):
    length = math.sqrt(sum(tf_idf_value['tf_idf'] ** 2 for tf_idf_value in vector_dict.values()))
    return length

def get_query_tokens(query):
    """
    Preprocesses the query into tokens.
    Returns a list of tokens.
    """
    return simple_preprocess(query)

def get_query_tokens_count(query_tokens):
    """
    Counts how many times each token appears in the query.
    Returns a dictionary of token -> frequency.
    """
    return dict(collections.Counter(query_tokens))

def compute_query_weight(term, query_tokens_count, dictionary, champion_list, total_number_of_docs):
    """
    Computes the TF-IDF weight of a query term.
    Returns w_tq (the query-term weight) and the term_docs to iterate over.
    """
    if term not in dictionary:
        return 0, {}

    # Retrieve the appropriate document list (champion list or full list)
    term_docs = (dictionary[term]['champions_list'] 
                 if champion_list 
                 else dictionary[term]['docs'])

    w_tq = calculate_tf_idf(query_tokens_count[term],
                            total_number_of_docs,
                            len(term_docs))
    return w_tq, term_docs

def update_doc_scores(term_docs, w_tq, cosine_scores, jaccard_scores):
    """
    Updates the cosine and jaccard scores for each document that contains the term.
    """
    for doc in term_docs:
        w_td = term_docs[doc]['tf_idf']
        doc_id = int(doc)

        # Update cosines similarity
        if doc_id in cosine_scores:
            cosine_scores[doc_id] += w_td * w_tq
            jaccard_scores[doc_id] += 1
        else:
            cosine_scores[doc_id] = w_td * w_tq
            jaccard_scores[doc_id] = 1

def finalize_cosine_scores(cosine_scores):
    """
    Divides each document's cosine score by its vector length.
    """
    for doc_number in cosine_scores:
        cosine_scores[doc_number] /= vector_length(docs_vectors[str(doc_number)])

def finalize_jaccard_scores(jaccard_scores, query_terms_num):
    """
    Calculates jaccard score for each document based on intersection/union.
    """
    for doc_number in jaccard_scores:
        doc_length = len(pre_processed_docs[str(doc_number)]['content'])
        intersection = jaccard_scores[doc_number]
        jaccard_scores[doc_number] = intersection / (doc_length + query_terms_num - intersection)

def sort_scores(scores_dict):
    """
    Sorts scores in descending order by value.
    Returns a list of (doc_id, score) tuples.
    """
    return sorted(scores_dict.items(), key=lambda x: x[1], reverse=True)

def query_scoring(query, total_number_of_docs, dictionary, k, champion_list=False):
    # Initialize scores
    cosine_scores = {}
    jaccard_scores = {}

    # Preprocess query
    query_tokens = get_query_tokens(query)
    query_tokens_count = get_query_tokens_count(query_tokens)
    query_terms_num = sum(query_tokens_count.values())

    print(query_tokens_count)

    # Compute and update scores for each term in the query
    for term in query_tokens_count:
        w_tq, term_docs = compute_query_weight(term,
                                               query_tokens_count,
                                               dictionary,
                                               champion_list,
                                               total_number_of_docs)
        if w_tq != 0:
            update_doc_scores(term_docs, w_tq, cosine_scores, jaccard_scores)

    # Finalize scores
    finalize_cosine_scores(cosine_scores)
    finalize_jaccard_scores(jaccard_scores, query_terms_num)

    # Sort and retrieve top k results
    sorted_doc_cosine = sort_scores(cosine_scores)
    sorted_doc_jaccard = sort_scores(jaccard_scores)

    return sorted_doc_cosine[:k], sorted_doc_jaccard[:k]

In [121]:
def print_results(results):
    dict_result = {}
    print("---------- Results ----------")

    for rank, (doc_id, _) in enumerate(results, start=1):
        if doc_id is None:
            continue
        
        print(f"Rank: {rank} | ID: {doc_id}")
        print(f"Title : {docs[str(doc_id)]['title']}")
        print(f"URL   : {docs[str(doc_id)]['url']}")
        print("-" * 50)  

        dict_result[rank] = {
            'docID': doc_id,
            'title': docs[str(doc_id)]["title"],
            'url'  : docs[str(doc_id)]["url"]
        }

    return dict_result

def query_search(query, result_numbers=5, champion_list=False):
    results_cosine, results_jaccard = query_scoring(query, len(docs), dictionary, result_numbers, champion_list)
    
    if not results_cosine and not results_jaccard:
        print("no results found")
        return {}, {}

    print("=== Cosine Scores ===")
    r1 = print_results(results_cosine)

    print("\n=== Jaccard Scores ===")
    r2 = print_results(results_jaccard)

    return r1, r2

In [141]:
r1, r2 = query_search('کریسمس', result_numbers = 5, champion_list = True)

{'کریسمس': 1}
=== Cosine Scores ===
---------- Results ----------
Rank: 1 | ID: 5933
Title : ستاره اسپانیایی؛ هدیه کریسمس گواردیولا به ژاوی+عکس
URL   : https://www.farsnews.ir/news/14001007000739/ستاره-اسپانیایی-هدیه-کریسمس-گواردیولا-به-ژاوی-عکس
--------------------------------------------------
Rank: 2 | ID: 6117
Title : کی‌روش «دیکتاتور» لقب گرفت/اختلاف مرد پرتغالی با مصری‌ها به خاطر کریسمس+عکس
URL   : https://www.farsnews.ir/news/14001005000165/کی‌روش-دیکتاتور-لقب-گرفت-اختلاف-مرد-پرتغالی-با-مصری‌ها-به-خاطر-کریسمس
--------------------------------------------------
Rank: 3 | ID: 6120
Title : کشتار در ورزشگاه فوتبال در آستانه سال جدید
URL   : https://www.farsnews.ir/news/14001005000143/کشتار-در-ورزشگاه-فوتبال-در-آستانه-سال-جدید
--------------------------------------------------
Rank: 4 | ID: 5926
Title : مهاجم خارجی مس رفسنجان،4 کودک را به محل تحصیل برگرداند +عکس
URL   : https://www.farsnews.ir/news/14001007000809/مهاجم-خارجی-مس-رفسنجان4-کودک-را-به-محل-تحصیل-برگرداند-عکس
--------------