In [1]:
# ==========================================
# BLOCK 1: SETUP & DOWNLOAD PROJECT
# ==========================================
import os

# 1. Reset directory to /content to avoid errors
os.chdir('/content')

# 2. Install dependencies
!pip install firebase-admin python-docx nltk
!pip install firebase-admin python-docx nltk requests

# 3. Download the project ZIP file
# ה-ID הזה חולץ מהקישור ששלחת קודם
file_id = '11_pyriXMsnnjC141BB47fOsCtfM6gUFr'
output_file = 'project.zip'


print("Downloading project files...")
# שימוש ב-gdown להורדה ישירה
!gdown {file_id} -O {output_file}

# 4. Unzip the files
print("Unzipping...")
!unzip -o {output_file}
print("Done.")

# בדיקה מה ירד (כדי שנדע אם זה בתוך תיקייה או לא)
print("Files in current directory:")
print(os.listdir('.'))

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0
Downloading project files...
Downloading...
From: https://drive.google.com/uc?id=11_pyriXMsnnjC141BB47fOsCtfM6gUFr
To: /content/project.zip
100% 25.6M/25.6M [00:00<00:00, 139MB/s] 
Unzipping...
Archive:  project.zip
   creating: Cloud_Computing_Project_Shark_Team/
   creating: Cloud_Computing_Project_Shark_Team/.git/
  inflating: Cloud_Computing_Project_Shark_Team/.git/AUTO_MERGE  
  inflating: Cloud_Computing_Project_Shark_Team/.git/COMMIT_EDITMSG  
  inflating: Cloud_Computing_Project_Shark_Team/.git/config  
  inflating: Cloud_Computing_Project_Shark_Team/.git/description  
  inflating: Cloud_Computing_Project_Shark_Team/.git/FETCH_H

In [2]:
import os

# כניסה לתיקייה שחולצה
os.chdir('Cloud_Computing_Project_Shark_Team')

# וידוא שאנחנו במקום הנכון (אמור להדפיס רשימה של כל קבצי ה-py וה-docx שלך)
print("Current Directory:", os.getcwd())
print("Files:", os.listdir('.'))

Current Directory: /content/Cloud_Computing_Project_Shark_Team
Files: ['requirements.txt', 'serviceAccountKey.json', '__pycache__', 'config.py', 'test_real_firebase.py', 'articles_data', '.git', 'Shark_Project_Runner.ipynb', 'auth_service.py', 'main.py', '.gitignore', 'gamification_rules.py', 'README.md', 'HW1_SHARK.docx', 'data_manager.py']


In [3]:
%%writefile config.py
import os
import sys
import firebase_admin
from firebase_admin import credentials, firestore

# בדיקה אם אנחנו ב-Colab
IN_COLAB = 'google.colab' in sys.modules

# מכיוון שנכנסנו לתיקייה (os.chdir), הנתיב הוא פשוט התיקייה הנוכחית (.)
PROJECT_ROOT = "."

KEY_FILENAME = "serviceAccountKey.json"
FIREBASE_CRED_PATH = os.path.join(PROJECT_ROOT, KEY_FILENAME)

# בדיקה שהמפתח קיים
if not os.path.exists(FIREBASE_CRED_PATH):
    print(f"[WARNING] Key not found at {FIREBASE_CRED_PATH}")

# --- SINGLETON DB CONNECTION ---
_DB_CLIENT = None

def get_db():
    global _DB_CLIENT
    if _DB_CLIENT is not None:
        return _DB_CLIENT

    if not firebase_admin._apps:
        try:
            cred = credentials.Certificate(FIREBASE_CRED_PATH)
            firebase_admin.initialize_app(cred)
            print(f"[System] Firebase initialized.")
        except Exception as e:
            print(f"[Critical Error] Failed to init Firebase: {e}")
            raise e

    _DB_CLIENT = firestore.client()
    return _DB_CLIENT

Overwriting config.py


In [4]:
import os

key_path = "serviceAccountKey.json"
articles_path = "articles_data"

if not os.path.exists(key_path):
    print("ERROR: 'serviceAccountKey.json' not found! Check if it was in the ZIP.")
else:
    print("Service Key found.")

if not os.path.exists(articles_path) or not os.listdir(articles_path):
    print("WARNING: 'articles_data' folder is empty or missing.")
else:
    print(f"Articles folder found with {len(os.listdir(articles_path))} files.")

Service Key found.
Articles folder found with 5 files.


In [None]:
!python main.py

[System] Firebase initialized.
Downloading NLTK stopwords...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.

   SHARK TEAM CLOUD SYSTEM - INITIALIZING
[OK] Database Connected
--- Seeding Database with REAL Data ---
Found 5 files. Processing...
Reading: 1-s2.0-S2772899424000417-main.docx...
  return query.where(field_path, op_string, value)
Skipping duplicate: 1-S2.0-S2772899424000417-Main
Reading: s41598-024-52038-y.docx...
Skipping duplicate: S41598-024-52038-Y
Reading: s41598-025-98454-6.docx...
Skipping duplicate: S41598-025-98454-6
Reading: pdis-03-15-0340-fe.docx...
Skipping duplicate: Pdis-03-15-0340-Fe
Reading: fpls_07_01419_pdf.docx...
Skipping duplicate: Fpls 07 01419 Pdf
Building Search Index...
Optimization: Starting index build in batches...
Cleaned 50 old index terms...
Indexing 5 articles...
Total unique 