In [5]:
# 📦 INSTALLATION CELL - Run this first!
# Installing all required packages for Arabic PDF RAG System

import subprocess
import sys

def install_package(package):
    """Install a package using pip"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ Successfully installed: {package}")
    except subprocess.CalledProcessError as e:
        print(f"❌ Failed to install {package}: {e}")

# List of required packages
packages = [
    "PyMuPDF",           # For PDF processing (correct package, not 'fitz')
    "sentence-transformers",  # For Arabic embeddings
    "faiss-cpu",         # For vector similarity search
    "numpy",             # For numerical operations
    "typing-extensions", # For type hints
]

print("🚀 Installing required packages for Arabic PDF RAG System...")
print("=" * 60)

for package in packages:
    print(f"\n📥 Installing {package}...")
    install_package(package)

print("\n" + "=" * 60)
print("✅ All packages installed successfully!")
print("🔄 Please restart the kernel and run the next cell.")
print("=" * 60)


🚀 Installing required packages for Arabic PDF RAG System...

📥 Installing PyMuPDF...
✅ Successfully installed: PyMuPDF

📥 Installing sentence-transformers...
✅ Successfully installed: sentence-transformers

📥 Installing faiss-cpu...
✅ Successfully installed: faiss-cpu

📥 Installing numpy...
✅ Successfully installed: numpy

📥 Installing typing-extensions...
✅ Successfully installed: typing-extensions

✅ All packages installed successfully!
🔄 Please restart the kernel and run the next cell.


In [None]:
# Streamlined Arabic PDF RAG System
import os
import glob
import fitz  # PyMuPDF for PDF handling
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import re
from typing import List, Dict
import warnings
warnings.filterwarnings('ignore')

print("🚀 Arabic PDF RAG System - Loading...")

# Configuration
PDF_FOLDER = r"D:\NLP_S\قوانين"
CHUNK_SIZE = 1000  # Increased chunk size
OVERLAP = 100      # Overlap between chunks
TOP_K = 3          # Number of results to retrieve
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

def clean_arabic_text(text: str) -> str:
    """Clean Arabic text from OCR artifacts"""
    if not text:
        return ""
    
    # Basic cleaning
    text = re.sub(r'\s+', ' ', text)  # Multiple spaces to single
    text = re.sub(r'\n\s*\n', '\n', text)  # Multiple newlines to single
    text = re.sub(r'[\.]{3,}', '...', text)  # Multiple dots
    text = re.sub(r'[-]{2,}', '--', text)  # Multiple dashes
    
    # Remove page markers and noise
    text = re.sub(r'Page \d+', '', text)
    text = re.sub(r'صفحة \d+', '', text)
    text = re.sub(r'^[-=_\s]+$', '', text, flags=re.MULTILINE)
    
    # Fix Arabic punctuation spacing
    text = re.sub(r'\s*([،؛؟!])\s*', r'\1 ', text)
    
    # Remove single character noise
    text = re.sub(r'\b[a-zA-Z]\b', '', text)
    
    return text.strip()

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF with OCR for scanned pages"""
    try:
        doc = fitz.open(pdf_path)
        full_text = ""
        
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            text = page.get_text()
            
            # If no text (scanned page), try OCR
            if not text.strip():
                try:
                    # Convert page to image and use OCR
                    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # Higher resolution
                    img_data = pix.tobytes("png")
                    
                    # Try different text extraction methods for scanned content
                    text = page.get_text("text") or page.get_text("dict")
                    if isinstance(text, dict):
                        # Extract from dict format
                        extracted = ""
                        for block in text.get("blocks", []):
                            if "lines" in block:
                                for line in block["lines"]:
                                    for span in line["spans"]:
                                        extracted += span.get("text", "") + " "
                                extracted += "\n"
                        text = extracted
                    
                    if not text.strip():
                        text = f"[SCANNED PAGE {page_num+1} - TEXT NOT EXTRACTABLE]"
                except:
                    text = f"[ERROR EXTRACTING PAGE {page_num+1}]"
            
            # Clean and add to full text
            cleaned_text = clean_arabic_text(text)
            full_text += cleaned_text + "\n"
        
        doc.close()
        return clean_arabic_text(full_text)
    
    except Exception as e:
        return f"Error processing {pdf_path}: {e}"

def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = OVERLAP) -> List[str]:
    """Split text into chunks"""
    if not text or len(text.strip()) < 100:
        return []
    
    chunks = []
    start = 0
    
    while start < len(text):
        end = start + chunk_size
        
        if end >= len(text):
            chunk = text[start:].strip()
            if len(chunk) > 100:
                chunks.append(chunk)
            break
        
        chunk = text[start:end]
        
        # Try to end at sentence boundaries
        for ending in ['.', '؟', '!', '؛', '\n']:
            pos = chunk.rfind(ending)
            if pos > chunk_size // 2:
                end = start + pos + 1
                chunk = text[start:end]
                break
        
        chunk = chunk.strip()
        if len(chunk) > 100:
            chunks.append(chunk)
        
        start = end - overlap
        
        if start >= len(text):
            break
    
    return [c for c in chunks if len(c.strip()) > 100]

# Process PDFs
print("📚 Processing PDFs...")
documents = []
pdf_files = glob.glob(os.path.join(PDF_FOLDER, "*.pdf"))

for pdf_path in pdf_files:
    filename = os.path.basename(pdf_path)
    print(f"Processing: {filename}")
    text = extract_text_from_pdf(pdf_path)
    
    if text and len(text.strip()) > 100:
        chunks = chunk_text(text)
        for j, chunk in enumerate(chunks):
            documents.append({
                'content': chunk,
                'source': filename,
                'chunk_id': j,
                'metadata': f"{filename} - Chunk {j+1}"
            })
        print(f"✅ Created {len(chunks)} chunks")
    else:
        print(f"⚠️ No usable text from {filename}")

print(f"Total chunks: {len(documents)}")

# Create embeddings and FAISS index
print("🧮 Creating embeddings...")
model = SentenceTransformer(EMBEDDING_MODEL)

texts = [doc['content'] for doc in documents]
if texts:
    embeddings = model.encode(texts, show_progress_bar=True)
    
    # Create FAISS index for cosine similarity
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)
    
    # Normalize for cosine similarity
    faiss.normalize_L2(embeddings)
    index.add(embeddings.astype('float32'))
    
    print(f"✅ Created FAISS index with {index.ntotal} vectors")
else:
    print("❌ No documents to index")
    index = None

# Search function
def search_documents(query: str, top_k: int = TOP_K) -> List[Dict]:
    """Search for relevant documents"""
    if index is None or not documents:
        return []
    
    # Encode and search
    query_embedding = model.encode([query])
    faiss.normalize_L2(query_embedding)
    scores, indices = index.search(query_embedding.astype('float32'), top_k)
    
    results = []
    for score, idx in zip(scores[0], indices[0]):
        if idx < len(documents):
            results.append({
                'content': documents[idx]['content'],
                'source': documents[idx]['source'],
                'metadata': documents[idx]['metadata'],
                'similarity_score': float(score)
            })
    
    return results

# Interactive search
print("✅ System ready! Use the search function below:")

def search_and_display(query: str):
    """Search and display results"""
    if not query.strip():
        print("❌ Please enter a question")
        return
    
    results = search_documents(query)
    
    if results:
        print(f"\n📄 Top {len(results)} results for: {query}\n")
        for i, doc in enumerate(results, 1):
            print(f"--- Result {i} ---")
            print(f"📂 Source: {doc['source']}")
            print(f"📊 Score: {doc['similarity_score']:.3f}")
            print(f"📝 Content: {doc['content'][:400]}...")
            print()
    else:
        print("❌ No relevant documents found.")

# Example usage:
# search_and_display("ما هي الضرائب المفروضة على الشركات؟")


ImportError: cannot import name 'TorchTensorParallelPlugin' from 'accelerate.utils' (/home/nu2/miniconda3/envs/saher/lib/python3.10/site-packages/accelerate/utils/__init__.py)

In [4]:
# Streamlined Arabic PDF RAG System
import os
import glob
import fitz  # PyMuPDF for PDF handling
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import re
from typing import List, Dict
import warnings
warnings.filterwarnings('ignore')

print("🚀 Arabic PDF RAG System - Loading...")

# Configuration
PDF_FOLDER = r"D:\NLP_S\قوانين"
CHUNK_SIZE = 1000  # Increased chunk size
OVERLAP = 100      # Overlap between chunks
TOP_K = 3          # Number of results to retrieve
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

def clean_arabic_text(text: str) -> str:
    """Clean Arabic text from OCR artifacts"""
    if not text:
        return ""
    
    # Basic cleaning
    text = re.sub(r'\s+', ' ', text)  # Multiple spaces to single
    text = re.sub(r'\n\s*\n', '\n', text)  # Multiple newlines to single
    text = re.sub(r'[\.]{3,}', '...', text)  # Multiple dots
    text = re.sub(r'[-]{2,}', '--', text)  # Multiple dashes
    
    # Remove page markers and noise
    text = re.sub(r'Page \d+', '', text)
    text = re.sub(r'صفحة \d+', '', text)
    text = re.sub(r'^[-=_\s]+$', '', text, flags=re.MULTILINE)
    
    # Fix Arabic punctuation spacing
    text = re.sub(r'\s*([،؛؟!])\s*', r'\1 ', text)
    
    # Remove single character noise
    text = re.sub(r'\b[a-zA-Z]\b', '', text)
    
    return text.strip()

def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF with OCR for scanned pages"""
    try:
        doc = fitz.open(pdf_path)
        full_text = ""
        
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            text = page.get_text()
            
            # If no text (scanned page), try OCR
            if not text.strip():
                try:
                    # Convert page to image and use OCR
                    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # Higher resolution
                    img_data = pix.tobytes("png")
                    
                    # Try different text extraction methods for scanned content
                    text = page.get_text("text") or page.get_text("dict")
                    if isinstance(text, dict):
                        # Extract from dict format
                        extracted = ""
                        for block in text.get("blocks", []):
                            if "lines" in block:
                                for line in block["lines"]:
                                    for span in line["spans"]:
                                        extracted += span.get("text", "") + " "
                                extracted += "\n"
                        text = extracted
                    
                    if not text.strip():
                        text = f"[SCANNED PAGE {page_num+1} - TEXT NOT EXTRACTABLE]"
                except:
                    text = f"[ERROR EXTRACTING PAGE {page_num+1}]"
            
            # Clean and add to full text
            cleaned_text = clean_arabic_text(text)
            full_text += cleaned_text + "\n"
        
        doc.close()
        return clean_arabic_text(full_text)
    
    except Exception as e:
        return f"Error processing {pdf_path}: {e}"

def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = OVERLAP) -> List[str]:
    """Split text into chunks"""
    if not text or len(text.strip()) < 100:
        return []
    
    chunks = []
    start = 0
    
    while start < len(text):
        end = start + chunk_size
        
        if end >= len(text):
            chunk = text[start:].strip()
            if len(chunk) > 100:
                chunks.append(chunk)
            break
        
        chunk = text[start:end]
        
        # Try to end at sentence boundaries
        for ending in ['.', '؟', '!', '؛', '\n']:
            pos = chunk.rfind(ending)
            if pos > chunk_size // 2:
                end = start + pos + 1
                chunk = text[start:end]
                break
        
        chunk = chunk.strip()
        if len(chunk) > 100:
            chunks.append(chunk)
        
        start = end - overlap
        
        if start >= len(text):
            break
    
    return [c for c in chunks if len(c.strip()) > 100]

# Process PDFs
print("📚 Processing PDFs...")
documents = []
pdf_files = glob.glob(os.path.join(PDF_FOLDER, "*.pdf"))

for pdf_path in pdf_files:
    filename = os.path.basename(pdf_path)
    print(f"Processing: {filename}")
    text = extract_text_from_pdf(pdf_path)
    
    if text and len(text.strip()) > 100:
        chunks = chunk_text(text)
        for j, chunk in enumerate(chunks):
            documents.append({
                'content': chunk,
                'source': filename,
                'chunk_id': j,
                'metadata': f"{filename} - Chunk {j+1}"
            })
        print(f"✅ Created {len(chunks)} chunks")
    else:
        print(f"⚠️ No usable text from {filename}")

print(f"Total chunks: {len(documents)}")

# Create embeddings and FAISS index
print("🧮 Creating embeddings...")
model = SentenceTransformer(EMBEDDING_MODEL)

texts = [doc['content'] for doc in documents]
if texts:
    embeddings = model.encode(texts, show_progress_bar=True)
    
    # Create FAISS index for cosine similarity
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)
    
    # Normalize for cosine similarity
    faiss.normalize_L2(embeddings)
    index.add(embeddings.astype('float32'))
    
    print(f"✅ Created FAISS index with {index.ntotal} vectors")
else:
    print("❌ No documents to index")
    index = None

# Search function
def search_documents(query: str, top_k: int = TOP_K) -> List[Dict]:
    """Search for relevant documents"""
    if index is None or not documents:
        return []
    
    # Encode and search
    query_embedding = model.encode([query])
    faiss.normalize_L2(query_embedding)
    scores, indices = index.search(query_embedding.astype('float32'), top_k)
    
    results = []
    for score, idx in zip(scores[0], indices[0]):
        if idx < len(documents):
            results.append({
                'content': documents[idx]['content'],
                'source': documents[idx]['source'],
                'metadata': documents[idx]['metadata'],
                'similarity_score': float(score)
            })
    
    return results

# Interactive search
print("✅ System ready! Use the search function below:")

def search_and_display(query: str):
    """Search and display results"""
    if not query.strip():
        print("❌ Please enter a question")
        return
    
    results = search_documents(query)
    
    if results:
        print(f"\n📄 Top {len(results)} results for: {query}\n")
        for i, doc in enumerate(results, 1):
            print(f"--- Result {i} ---")
            print(f"📂 Source: {doc['source']}")
            print(f"📊 Score: {doc['similarity_score']:.3f}")
            print(f"📝 Content: {doc['content'][:400]}...")
            print()
    else:
        print("❌ No relevant documents found.")

# Example usage:
# search_and_display("ما هي الضرائب المفروضة على الشركات؟")


ImportError: cannot import name 'TorchTensorParallelPlugin' from 'accelerate.utils' (/home/nu2/miniconda3/envs/saher/lib/python3.10/site-packages/accelerate/utils/__init__.py)

In [2]:
# 🔍 Search your documents here!
# Replace the question below with your own Arabic question

search_and_display("ما هي الضرائب المفروضة على الشركات؟")

# You can also try these examples:
# search_and_display("كيف يتم حساب ضريبة القيمة المضافة؟")
# search_and_display("ما هي العقوبات على التأخير في دفع الضرائب؟")
# search_and_display("ما هي شروط الإعفاء الضريبي؟")



📄 Top 3 results for: ما هي الضرائب المفروضة على الشركات؟

--- Result 1 ---
📂 Source: law_no.30-2023.pdf
📊 Score: 0.709
📝 Content: ﺍﻟﺘﻲ ﺘﺤﺼل ﻋﻠﻴﻬﺎ ﺍﻟﺤﻜﻭﻤﺔ ﻭﻭﺤﺩﺍ ﺕ ﺍﻹﺩﺍﺭﺓ ﺍﻟﻤﺤﻠﻴﺔ ﻭﻏﻴﺭﻫﺎ ﻤﻥ ﺍﻷﺸـﺨﺎﺹ ﺍﻻﻋﺘﺒﺎﺭﻴﺔ ﺍﻟﻌﺎﻤﺔ ﻤﻥ ﻤﺼﺎﺩﺭ ﺨﺎﺭﺝ ﻤﺼﺭ. ﻣﺎﺩﺓ)٦٥ ﻣﻜﺮﺭ ً ﺍ(: ﺘﺨﻀﻊ ﻟﻠﻀﺭﻴﺒﺔ ﺒﺴﻌ ﺭ)٠١٪ ( ﺩﻭﻥ ﺨﺼﻡ ﺃﻴﺔ ﺘﻜﺎﻟﻴﻑ ﺘﻭﺯﻴﻌـﺎﺕ ﺍﻷﺭﺒـﺎﺡ ﺍﻟﺘـﻲ ﺘﺠﺭﻴﻬﺎ ﺸﺭﻜﺎﺕ ﺍﻷﻤﻭﺍل ﺃﻭ ﺸﺭﻜﺎﺕ ﺍﻷﺸﺨﺎﺹ، ﺒﻤﺎ ﻓﻲ ﺫﻟﻙ ﺍﻟﺸﺭﻜﺎﺕ ﺍﻟﻤﻘﺎﻤﺔ ﺒﻨﻅـﺎﻡ ﺍﻟﻤﻨﺎﻁﻕ ﺍﻻﻗﺘﺼﺎﺩﻴﺔ ﺫﺍﺕ ﺍﻟﻁ ﺒﻴﻌﺔ ﺍﻟﺨﺎﺼﺔ ﻟﻠﺸﺨﺹ ﺍﻟﻁ ﺒﻴﻌﻲ ﻏﻴـﺭ ﺍﻟﻤﻘـﻴﻡ ﻭﺍﻟـﺸﺨﺹ ﺍﻻﻋﺘﺒﺎﺭ ﻯ ﺍﻟﻤﻘﻴﻡ ﺃﻭ ﻏﻴﺭ ﺍﻟﻤﻘﻴﻡ ﺒﻤﺎ ﻓﻲ ﺫﻟﻙ ﺃﺭﺒﺎﺡ ﺍﻷﺸﺨﺎﺹ ﺍﻻﻋﺘﺒﺎﺭﻴﺔ ﻏﻴـ...

--- Result 2 ---
📂 Source: law_no.30-2023.pdf
📊 Score: 0.697
📝 Content: ﻤﻥ ﻤﺭﺍﺠﻌﺔ ﻟﻠﺘﺸﺭﻴﻌﺎﺕ ﻭﺍﻹﺠﺭﺍﺀﺍﺕ ﻭﺍﻟﺤﻭﺍﻓﺯ ﺍﻟﻀﺭﻴﺒﻴﺔ ﻭﺍﻟﺠﻤﺭﻜﻴـﺔ، ﻭﻗﻴـﺎﺱ ﺤﺎﻻﺕ ﺍﻻﻤﺘﺜﺎل ﺍﻟﻀﺭﻴﺒﻲ ﻟﻠﻤﺴﺘﺜﻤﺭﻴﻥ. صورة إلكترونية ال يعتد بها عند التداول اﻟﺟرﯾدة اﻟرﺳﻣﯾﺔ– اﻟﻌدد ٤٢) ﺗﺎﺑﻊ ( ﻓﻰ ٥١ ﯾوﻧﯾﺔ ﺳﻧﺔ ٣٢٠٢ ٧١ ٩- ﺍﻟﻘﻴﺎﺱ ﺍﻟﻤﺴﺘﻤﺭ ﻟﻤﺸﺭﻭﻋﺎﺕ ﺍﻹﺩﺍﺭﺓ ﺍﻟﻀﺭﻴﺒﻴﺔ ﻭﻤﺩﻯ ﺍﻋﺘﻤﺎﺩﻫﺎ ﻋﻠﻰ ﺍﻷﺴـﺎﺱ ﺍﻟﻔﻌﻠﻲ ﺍﻟﻌﺎﺩل ﻓﻲ ﺘﺤﺼﻴل ﺠﻤﻴﻊ ﺃﻨﻭﺍﻉ ﺍﻟﻀﺭﺍﺌﺏ ﻭﺍﻟﺠﻤـﺎﺭﻙ، ﻭﺇﺠﺭﺍﺀﺍﺘﻬـﺎ ﻟﺨﻔـﺽ ﺍﻟﻁﻌﻭﻥ ﻭﺍﻟﻤﻨﺎﺯﻋﺎﺕ ﺍﻟﻀﺭﻴﺒﻴﺔ ﺇﻟﻰ ﺃﻗل ﺤﺩ ﻤﻤﻜﻥ . ﻭﻴﺼﺩﺭ ﺒﺘﺸﻜﻴ

In [3]:
# 🤖 Enhanced Arabic PDF RAG System with Ollama Integration
import subprocess
import json
import sys
import re

def clean_ollama_response(response: str) -> str:
    """
    Keep response as-is, including thinking parts
    """
    if not response:
        return "❌ لم يتم الحصول على إجابة من النموذج"
    
    # Keep everything as-is, just basic cleanup
    response = response.strip()
    
    return response

def query_ollama(prompt: str, model: str = "deepseek-r1:7b") -> str:
    """
    Query Ollama model using subprocess with improved response handling
    """
    try:
        # Prepare the command
        cmd = ["ollama", "run", model]
        
        # Start the process
        process = subprocess.Popen(
            cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            encoding='utf-8'
        )
        
        # Send the prompt and get response
        stdout, stderr = process.communicate(input=prompt, timeout=600)  # 5 minute timeout
        
        if process.returncode != 0:
            return f"❌ خطأ في استدعاء Ollama: {stderr}"
        
        # Clean and return response
        cleaned_response = clean_ollama_response(stdout)
        
        return cleaned_response
        
    except subprocess.TimeoutExpired:
        return "❌ انتهت مهلة الاستجابة. يرجى المحاولة مرة أخرى."
    except FileNotFoundError:
        return "❌ Ollama غير موجود. يرجى التأكد من تثبيت Ollama وإضافته إلى PATH."
    except Exception as e:
        return f"❌ خطأ في التواصل مع Ollama: {str(e)}"

def create_rag_prompt(question: str, search_results: List[Dict]) -> str:
    """
    Create a concise prompt for the LLM with retrieved context
    """
    
    # Collect relevant content (take only first 2-3 most relevant results to reduce size)
    context_texts = []
    for i, result in enumerate(search_results[:3], 1):  # Limit to top 3 results
        # Truncate content to reduce size
        content = result['content'][:500] + "..." if len(result['content']) > 500 else result['content']
        context_texts.append(f"المصدر {i}: {content}")
    
    context = "\n---\n".join(context_texts)
    
    prompt = f"""أنت خبير قانوني مصري. أجب على السؤال بناءً على النصوص القانونية المرفقة فقط.

السؤال: {question}

النصوص القانونية:
{context}

التعليمات:
- أجب بالعربية فقط
- استخدم النصوص المرفقة فقط
- اذكر المصدر والمادة القانونية
- كن دقيقاً ومختصراً

الإجابة:"""
    
    return prompt


def enhanced_search_and_answer(query: str):
    """
    Simple search function that returns question, retrieved documents, and model answer
    """
    if not query.strip():
        return {"error": "يرجى إدخال سؤال"}
    
    # Get search results
    results = search_documents(query, top_k=5)
    
    if not results:
        return {"error": "لم يتم العثور على مستندات ذات صلة"}
    
    # Create comprehensive prompt
    rag_prompt = create_rag_prompt(query, results)
    
    # Get LLM response
    llm_response = query_ollama(rag_prompt)
    
    return {
        'question': query,
        'retrieved_documents': results,
        'model_answer': llm_response
    }




In [4]:
# 🚀 Test the Enhanced RAG System with Ollama
# Make sure Ollama is running and deepseek-r1:7b model is available

# Simple test - returns only question, retrieved documents, and model answer
result = enhanced_search_and_answer("ما هي الضرائب المفروضة على الشركات؟")

# Display the clean result
if 'error' in result:
    print(f"خطأ: {result['error']}")
else:
    print(f"السؤال: {result['question']}")
    print(f"\nعدد المستندات المسترجعة: {len(result['retrieved_documents'])}")
    print(f"\nالإجابة: {result['model_answer']}")



السؤال: ما هي الضرائب المفروضة على الشركات؟

عدد المستندات المسترجعة: 5

الإجابة: Thinking...
Okay, so I need to answer the question about what taxes are imposed on companies using the provided legal texts. Let me go through each source one by one.

Starting with Source 1, which is in Arabic and talks about how businesses are subject to a tax rate of up to 65% on their taxable income. It mentions that this applies to partnerships and corporations taxed under the general business tax law without exempting individuals from certain gains, especially those made through ordinary activities. So, the main point here seems to be the high tax rate of 65%.

Moving on to Source 2, it discusses regulations concerning shares, investments, and income tax, as well as excise duties. It mentions that companies must file reports quarterly based on their income. Additionally, there are provisions about exemptions for small investors and certain types of businesses. So, from this source, the taxes include