In [None]:
import pandas as pd
import re
import random
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [None]:
df=pd.read_csv("final_merged_career_guidance.csv", on_bad_lines='skip', engine='python')
def clean_qa_data(df):
    cleaned_df=df.copy()
    #Removed duplicates
    initial_count=len(cleaned_df)
    cleaned_df=cleaned_df.drop_duplicates(subset=['question'])
    print(f"Removed {initial_count -len(cleaned_df)}duplicates questions.")
    #standarize capitalization and punctuation
    cleaned_df['question']=cleaned_df['question'].apply(lambda x: x.strip().lower())
    cleaned_df['question']=cleaned_df['question'].apply(lambda x: x if x.endswith(('?','.','!')) else x+ '?')
    cleaned_df['question']=cleaned_df['question'].apply(lambda x: re.sub(r'[^\w\s\?\.\!]', '', x))
    cleaned_df["question"]=cleaned_df["question"].apply(lambda x:re.sub(r'\s', ' ', x).strip())
    cleaned_df['answer']=cleaned_df['answer'].apply(lambda x: re.sub(r'\s+', ' ', x).strip())

    print(f"Final cleaned dataset: {len(cleaned_df)} Q&A pairs")
    return cleaned_df

In [None]:


def optimized_augment_qa_data(df, augmentation_factor=2):
    """
    Optimized augmentation - should take seconds, not minutes
    """
    print("🚀 Creating question variations (optimized)...")

    # Pre-define variation functions
    question_variations = [
        lambda q: q,
        lambda q: q.replace('what is', 'tell me about'),
        lambda q: q.replace('what does', 'what do'),
        lambda q: q.replace('what are', 'what is'),
        lambda q: f"can you explain {q}",
        lambda q: f"i want to know about {q}",
        lambda q: f"could you tell me about {q}",
        lambda q: f"what can you tell me about {q}",
        lambda q: q.replace('?', ''),
        lambda q: q + " please",
    ]

    answer_variations = [
        lambda a: a,
        lambda a: a[0].lower() + a[1:] if a else a,
        lambda a: "Career Compass: " + a,
    ]

    # Pre-calculate how many rows we'll create
    total_rows = len(df) * (augmentation_factor + 1)  # +1 for originals
    print(f"Will create approximately {total_rows} total rows")

    # Use list comprehension for faster processing
    augmented_rows = []

    # Use tqdm for progress tracking
    for _, row in tqdm(df.iterrows(), total=len(df), desc="Augmenting"):
        original_question = row['question']
        original_answer = row['answer']

        # Add original row
        augmented_rows.append({
            'question': original_question,
            'answer': original_answer,
            'is_augmented': False,
            'original_question': original_question
        })

        # Create variations
        variations_created = 0
        used_combinations = set()

        # Create exactly augmentation_factor variations per row
        while variations_created < augmentation_factor:
            q_func = random.choice(question_variations)
            a_func = random.choice(answer_variations)

            combo = (q_func.__name__, a_func.__name__)
            if combo in used_combinations and len(used_combinations) < len(question_variations):
                continue

            used_combinations.add(combo)

            new_question = q_func(original_question)
            new_answer = a_func(original_answer)

            # Only add if different from original
            if new_question != original_question or new_answer != original_answer:
                augmented_rows.append({
                    'question': new_question,
                    'answer': new_answer,
                    'is_augmented': True,
                    'original_question': original_question
                })
                variations_created += 1

            # Safety break
            if len(used_combinations) >= len(question_variations) and variations_created < augmentation_factor:
                break

    # Create DataFrame in one go
    augmented_df = pd.DataFrame(augmented_rows)
    print(f"✅ Created {len(augmented_df)} total entries")

    return augmented_df

def fast_augmentation(df, augmentation_factor=2):
    """
    Even faster approach for very large datasets
    """
    print("⚡ Using ultra-fast augmentation...")

    # Sample variations instead of iterating through all
    samples_per_row = min(augmentation_factor, 5)  # Limit variations

    # Use pandas operations instead of loops
    augmented_dfs = []

    # Original data
    original_df = df.copy()
    original_df['is_augmented'] = False
    original_df['original_question'] = original_df['question']
    augmented_dfs.append(original_df)

    # Create variations in batches
    for i in range(samples_per_row):
        variation_df = df.copy()

        # Apply variations
        if i == 0:
            variation_df['question'] = variation_df['question'].apply(
                lambda q: q.replace('what is', 'tell me about') if 'what is' in q else q
            )
        elif i == 1:
            variation_df['question'] = variation_df['question'].apply(
                lambda q: f"can you explain {q}"
            )
        elif i == 2:
            variation_df['question'] = variation_df['question'].apply(
                lambda q: q.replace('?', '') + " please" if q.endswith('?') else q
            )

        variation_df['is_augmented'] = True
        variation_df['original_question'] = variation_df['question']
        augmented_dfs.append(variation_df)

    # Combine all
    result_df = pd.concat(augmented_dfs, ignore_index=True)
    result_df = result_df.drop_duplicates(subset=['question'])

    print(f"⚡ Created {len(result_df)} entries")
    return result_df

# Main execution
def main():
    print("Starting optimized data processing...")

    # Load data
    df = pd.read_csv('final_merged_career_guidance.csv')
    print(f"Original data: {len(df)} rows")

    # Clean data
    cleaned_df = clean_qa_data(df)

    # Choose augmentation method based on dataset size
    if len(cleaned_df) > 1000:
        print("Large dataset detected, using fast method...")
        augmented_df = fast_augmentation(cleaned_df, augmentation_factor=2)
    else:
        print("Small dataset, using detailed method...")
        augmented_df = optimized_augment_qa_data(cleaned_df, augmentation_factor=2)

    # Save results
    augmented_df.to_csv('cleaned_augmented_career_data.csv', index=False)
    print("✅ Saved cleaned and augmented data!")

    # Show statistics
    print(f"\n📊 Final Statistics:")
    print(f"Original rows: {len(df)}")
    print(f"After cleaning: {len(cleaned_df)}")
    print(f"After augmentation: {len(augmented_df)}")
    print(f"Augmented rows: {len(augmented_df) - len(cleaned_df)}")
    print(f"Total growth: {len(augmented_df)/len(cleaned_df):.1f}x")

if __name__ == "__main__":
    import time
    start_time = time.time()

    main()

    end_time = time.time()
    print(f"\n⏰ Total processing time: {end_time - start_time:.2f} seconds")

Starting optimized data processing...
Original data: 10224 rows
Removed 7246duplicates questions.
Final cleaned dataset: 2978 Q&A pairs
Large dataset detected, using fast method...
⚡ Using ultra-fast augmentation...
⚡ Created 6366 entries
✅ Saved cleaned and augmented data!

📊 Final Statistics:
Original rows: 10224
After cleaning: 2978
After augmentation: 6366
Augmented rows: 3388
Total growth: 2.1x

⏰ Total processing time: 1.52 seconds


In [None]:
!pip uninstall -y weaviate weaviate-client


[0m

In [None]:
!pip install weaviate-client==4.16.9


Collecting weaviate-client==4.16.9
  Downloading weaviate_client-4.16.9-py3-none-any.whl.metadata (3.7 kB)
Collecting validators<1.0.0,>=0.34.0 (from weaviate-client==4.16.9)
  Downloading validators-0.35.0-py3-none-any.whl.metadata (3.9 kB)
Collecting deprecation<3.0.0,>=2.1.0 (from weaviate-client==4.16.9)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Downloading weaviate_client-4.16.9-py3-none-any.whl (579 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m579.1/579.1 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Downloading validators-0.35.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.7/44.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: validators, deprecation, weaviate-client
Successfully installed deprecation-2.1.0 validators-0.35.0 weaviate-client-4.16.9


In [None]:
!pip install langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting requests<3,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.6.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.3.29-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip install langchain-weaviate

Collecting langchain-weaviate
  Downloading langchain_weaviate-0.0.5-py3-none-any.whl.metadata (2.7 kB)
Downloading langchain_weaviate-0.0.5-py3-none-any.whl (10 kB)
Installing collected packages: langchain-weaviate
Successfully installed langchain-weaviate-0.0.5


In [None]:
import os

# Replace with your actual Weaviate Cloud info
os.environ["WEAVIATE_CLOUD_URL"] = "wtttsbrqtxscrxxj7bqcqa.c0.asia-southeast1.gcp.weaviate.cloud"
os.environ["WEAVIATE_API_KEY"] = "bmxnYWRQRVROUE1oNlNYTl8vb2djdWhxcnhlT3l0ak1nbDNIbHZLZWNpcnpiRDJ0MTFuTzhuS3lnSEtBPV92MjAw"


In [None]:
import os
import pandas as pd
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.config import Property, DataType
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain.docstore.document import Document
from langchain.text_splitter import TokenTextSplitter
from openai import OpenAI


# ✅ Set your OpenAI API Key here
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY_HERE"   # replace with your key
llm_client = OpenAI()


class CareerCompassWeaviate:
    def __init__(self):
        self.client = None
        self.vectorstore = None

    def _initialize_weaviate_client(self):
        """Initialize Weaviate Cloud client """
        try:
            cluster_url = os.getenv("WEAVIATE_CLOUD_URL")
            api_key = os.getenv("WEAVIATE_API_KEY")

            print(f"🔗 Connecting to: {cluster_url}")

            self.client = weaviate.connect_to_weaviate_cloud(
                cluster_url=cluster_url,
                auth_credentials=Auth.api_key(api_key)
            )

            if self.client.is_ready():
                print("✅ Successfully connected to Weaviate Cloud!")
                return True
            else:
                print("❌ Failed to connect to Weaviate Cloud")
                return False

        except Exception as e:
            print(f"❌ Connection error: {e}")
            return False

    def _check_and_create_schema(self):
        """Check if schema exists, create if not."""
        try:
            class_name = "CareerKnowledge"
            schema = self.client.collections.list_all()

            if class_name not in [c.name for c in schema]:
                print("📋 Creating Weaviate schema...")

                self.client.collections.create(
                    name=class_name,
                    properties=[
                        Property(name="question", data_type=DataType.TEXT),
                        Property(name="answer", data_type=DataType.TEXT),
                        Property(name="is_augmented", data_type=DataType.BOOL),
                        Property(name="source", data_type=DataType.TEXT),
                    ]
                )
                print("✅ Schema created successfully!")
            else:
                print("✅ Schema already exists")

            return True

        except Exception as e:
            print(f"❌ Schema creation error: {e}")
            return False


    def initialize_system(self, data_path):
        """Initialize the complete RAG system."""
        print("🚀 Initializing Career Compass...")

        if not self._initialize_weaviate_client():
            print("❌ Failed to initialize Weaviate client")
            return False

        if not self._check_and_create_schema():
            print("❌ Failed to create schema")
            return False

        print(f"📂 Loading data from: {data_path}")
        try:
            df = pd.read_csv(data_path)
            print(f"📄 Loaded {len(df)} rows from CSV")
        except Exception as e:
            print(f"❌ Failed to load CSV: {e}")
            return False

        # ✅ Token-based chunking
        print("✂️ Splitting text into token-based chunks...")
        text_splitter = TokenTextSplitter(
            chunk_size=200,    # 200 tokens per chunk
            chunk_overlap=20
        )

        documents = []
        for _, row in df.iterrows():
            chunks = text_splitter.split_text(row["answer"])
            for chunk in chunks:
                doc = Document(
                    page_content=chunk,
                    metadata={
                        "question": row["question"],
                        "answer": row["answer"],
                        "is_augmented": False,
                        "source": "career_compass_dataset"
                    }
                )
                documents.append(doc)

        print(f"📝 Prepared {len(documents)} chunks for embedding")

        # Embeddings
        print("🧠 Initializing embeddings...")
        embedding_model = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': True}
        )

        # Create vector store
        print("💾 Creating vector store...")
        self.vectorstore = WeaviateVectorStore(
            client=self.client,
            index_name="CareerKnowledge",
            text_key="answer",
            embedding=embedding_model,
            attributes=["question", "answer", "is_augmented", "source"]
        )

        # Add documents in batches
        print("📤 Adding documents to Weaviate...")
        batch_size = 100
        for i in range(0, len(documents), batch_size):
            batch = documents[i:i + batch_size]
            self.vectorstore.add_documents(batch)
            if (i // batch_size) % 10 == 0:
                print(f"   Added {min(i + batch_size, len(documents))}/{len(documents)} chunks")

        print(f"✅ Added {len(documents)} chunks to Weaviate Cloud")
        print("🎉 Career Compass (RAG) initialized successfully!")
        return True

    def ask_question(self, question):
        """Ask a question and get a synthesized RAG answer."""
        try:
            if not self.vectorstore:
                return {"answer": "System not initialized.", "confidence": "Error"}

            # Step 1: Retrieve relevant chunks
            results = self.vectorstore.similarity_search(query=question, k=5)#k is the top five results

            if not results:
                return {"answer": "I don't have enough information.", "confidence": "Low"}

            # Step 2: Build context
            context = "\n".join([doc.page_content for doc in results])

            # Step 3: Construct RAG prompt
            prompt = f"""
            You are Career Compass, a helpful career guidance assistant.

            Use the following context to answer the question:
            {context}

            Question: {question}
            Answer:
            """

            # Step 4: Call LLM
            response = llm_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.3,
                max_tokens=300
            )

            final_answer = response.choices[0].message.content

            return {
                "answer": final_answer,
                "retrieved_chunks": len(results),
                "confidence": "High"
            }

        except Exception as e:
            return {
                "answer": f"Error: {str(e)}",
                "confidence": "Error"
            }

    def close_connection(self):
        """Close the connection."""
        print("🔌 Closing connection...")
        if self.client:
            self.client.close()
        print("✅ Connection closed")


In [None]:
career_system = CareerCompassWeaviate()

# Initialize with your dataset
career_system.initialize_system("final_merged_career_guidance.csv")

# Ask a question
response = career_system.ask_question("What skills are important for AI engineers?")
print("💡 Answer:", response["answer"])


🚀 Initializing Career Compass...
🔗 Connecting to: wtttsbrqtxscrxxj7bqcqa.c0.asia-southeast1.gcp.weaviate.cloud
✅ Successfully connected to Weaviate Cloud!
📋 Creating Weaviate schema...
✅ Schema created successfully!
📂 Loading data from: final_merged_career_guidance.csv
📄 Loaded 10224 rows from CSV
✂️ Splitting text into token-based chunks...
📝 Prepared 25264 chunks for embedding
🧠 Initializing embeddings...


  embedding_model = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

💾 Creating vector store...
📤 Adding documents to Weaviate...
   Added 100/25264 chunks
   Added 1100/25264 chunks
   Added 2100/25264 chunks
   Added 3100/25264 chunks
   Added 4100/25264 chunks
   Added 5100/25264 chunks
   Added 6100/25264 chunks
   Added 7100/25264 chunks
   Added 8100/25264 chunks
   Added 9100/25264 chunks
   Added 10100/25264 chunks
   Added 11100/25264 chunks
   Added 12100/25264 chunks
   Added 13100/25264 chunks
   Added 14100/25264 chunks
   Added 15100/25264 chunks
   Added 16100/25264 chunks
   Added 17100/25264 chunks
   Added 18100/25264 chunks
   Added 19100/25264 chunks
   Added 20100/25264 chunks
   Added 21100/25264 chunks
   Added 22100/25264 chunks
   Added 23100/25264 chunks
   Added 24100/25264 chunks
   Added 25100/25264 chunks
✅ Added 25264 chunks to Weaviate Cloud
🎉 Career Compass (RAG) initialized successfully!
💡 Answer: Important skills for AI engineers include:

1. **Programming Languages**: Proficiency in languages such as Python and R is e

In [None]:
# Ask a question
response = career_system.ask_question("What are the main areas of specialization in a law degree")
print("💡 Answer:", response["answer"])

💡 Answer: A law degree can lead to various areas of specialization, including:

1. **Criminal Law**: Focuses on defending or prosecuting individuals accused of crimes.
2. **Corporate Law**: Involves legal issues related to businesses, including mergers, acquisitions, and compliance.
3. **Family Law**: Deals with legal matters such as divorce, child custody, and adoption.
4. **Intellectual Property Law**: Protects creations of the mind, such as inventions, trademarks, and copyrights.
5. **Environmental Law**: Focuses on regulations and policies related to environmental protection and natural resources.
6. **Labor and Employment Law**: Covers issues related to workplace rights, employee benefits, and labor relations.
7. **Tax Law**: Involves the legal aspects of taxation for individuals and businesses.
8. **International Law**: Addresses legal issues that transcend national borders, including treaties and trade agreements.
9. **Real Estate Law**: Focuses on property transactions, land us

In [None]:
# Ask a question
response = career_system.ask_question("What skills are important to succeed as a law student?")
print("💡 Answer:", response["answer"])

💡 Answer: To succeed as a law student, important skills include:

1. **Strong Organizational Abilities**: Managing coursework, deadlines, and study schedules effectively.
2. **Attention to Detail**: Carefully analyzing legal texts and understanding nuanced legal concepts.
3. **Proficiency in Legal Research Tools**: Utilizing databases and resources for thorough legal research.
4. **Excellent Communication Skills**: Articulating arguments clearly in both written and verbal forms.
5. **Knowledge of Legal Terminology**: Understanding and using legal language accurately.
6. **Ability to Maintain Confidentiality**: Handling sensitive information with discretion and professionalism.
7. **Time Management Skills**: Balancing various assignments, readings, and extracurricular activities efficiently.

Developing these skills will help law students excel in their studies and prepare them for future legal careers.


In [None]:

# Ask a question
response = career_system.ask_question("How long does it typically take to complete a law degree in lebanon?")
print("💡 Answer:", response["answer"])

💡 Answer: In Lebanon, a law degree typically takes about 4 to 5 years to complete. Students usually pursue a Bachelor of Laws (LL.B.) degree, which is the standard requirement for entering the legal profession. After obtaining the LL.B., graduates may also need to complete additional requirements, such as internships or passing a bar exam, to practice law in Lebanon.


In [None]:


# Ask a question
response = career_system.ask_question("Is licensure required to practice architecture professionally?")
print("💡 Answer:", response["answer"])

💡 Answer: Yes, licensure is required to practice architecture professionally in most places, including the United States. To become a licensed architect, one typically needs to complete the following steps:

1. **Education**: Obtain a degree from an accredited architecture program.
2. **Experience**: Complete a required amount of professional experience, often through an internship program.
3. **Examinations**: Pass the Architect Registration Examination (ARE), which tests knowledge and skills necessary for architectural practice.

Once licensed, architects can legally call themselves "architects" and take on projects that require professional oversight. The specific requirements for licensure can vary by state, so it's important to check the regulations in the state where you plan to practice.


In [None]:

# Ask a question
response = career_system.ask_question("What software tools are commonly used in architecture programs?")
print("💡 Answer:", response["answer"])

💡 Answer: In architecture programs, commonly used software tools include:

1. **AutoCAD** - For 2D and 3D drafting and design.
2. **Revit** - For Building Information Modeling (BIM).
3. **SketchUp** - For 3D modeling and visualization.
4. **Rhino** - For advanced 3D modeling and design.
5. **Adobe Creative Suite (Photoshop, Illustrator, InDesign)** - For graphic design and presentation.
6. **Lumion or V-Ray** - For rendering and visualization.
7. **Archicad** - Another BIM software option.
8. **Grasshopper** - For algorithmic design and parametric modeling.
9. **Microsoft Excel** - For data analysis and project management.
10. **Project Management Tools (like Trello or Jira)** - For task management and collaboration.

These tools help architects in designing, modeling, visualizing, and managing their projects effectively.


In [None]:
# Ask a question
response = career_system.ask_question("What courses are essential in a business management degree?")
print("💡 Answer:", response["answer"])

💡 Answer: In a business management degree program, essential courses typically include:

1. **Principles of Management** - Understanding management theories, practices, and functions.
2. **Marketing Management** - Learning about marketing strategies, consumer behavior, and market research.
3. **Financial Management** - Covering financial analysis, budgeting, and investment strategies.
4. **Operations Management** - Focusing on production processes, quality control, and supply chain management.
5. **Human Resource Management** - Exploring recruitment, training, performance management, and employee relations.
6. **Business Law** - Understanding legal principles that govern business operations.
7. **Strategic Management** - Learning about long-term planning, competitive analysis, and organizational strategy.
8. **Economics** - Covering microeconomics and macroeconomics to understand market dynamics.
9. **Accounting** - Basics of financial and managerial accounting for business decision-ma

In [None]:
# Ask a question
response = career_system.ask_question("what is the best universities in lebanon")
print("💡 Answer:", response["answer"])

💡 Answer: The best universities in Lebanon are generally considered to be:

1. **American University of Beirut (AUB)** - AUB is one of the oldest and most prestigious universities in the Middle East, known for its strong academic programs and research initiatives.

2. **Lebanese American University (LAU)** - LAU offers a wide range of undergraduate and graduate programs and is recognized for its commitment to academic excellence and community service.

3. **University of Saint Joseph (USJ)** - USJ is a private university that offers a variety of programs and is known for its strong emphasis on liberal arts education.

4. **Lebanese University (LU)** - As the only public university in Lebanon, LU offers a diverse range of programs and has multiple campuses across the country.

5. **Notre Dame University - Louaize (NDU)** - NDU is known for its strong programs in engineering, business, and health sciences.

6. **Beirut Arab University (BAU)** - BAU is recognized for its academic programs

In [None]:
# Ask a question
response = career_system.ask_question("is computer science is hard in lebanese university")
print("💡 Answer:", response["answer"])

💡 Answer: The difficulty of studying Computer Science at a Lebanese university, or any university for that matter, can vary based on several factors, including the specific institution, the curriculum, the faculty, and the individual student's background and aptitude for the subject.

1. **Curriculum and Faculty**: Some universities may have a more rigorous curriculum and highly qualified faculty, which can make the program challenging. It's important to research the specific university's Computer Science program to understand its reputation and the quality of education provided.

2. **Student Background**: If you have a strong foundation in mathematics and logical reasoning, you may find Computer Science concepts easier to grasp. Conversely, if you struggle with these areas, you might find the coursework more challenging.

3. **Practical Experience**: Computer Science often involves hands-on programming and practical applications. Students who engage in projects, internships, or self-

In [None]:
# Ask a question
response = career_system.ask_question("What is the recipient  of cake")
print("💡 Answer:", response["answer"])

💡 Answer: The recipient of the cake can vary depending on the context. In the scenario described, the cake is intended for a party where 500 people have RSVP'd, meaning the cake is meant to be shared among guests at the event. If you are baking or purchasing a cake for a specific occasion, the recipient could be:

1. **Party Guests** - In the context of a celebration or gathering.
2. **Family and Friends** - For personal celebrations like birthdays, anniversaries, or holidays.
3. **Colleagues** - For work-related events or celebrations.
4. **Yourself** - If you're baking for personal enjoyment or to practice your baking skills.
5. **Charity or Community Events** - If the cake is intended for a fundraiser or community gathering.

Ultimately, the recipient is determined by the occasion and the intention behind baking or buying the cake.


In [None]:
# Ask a question
response = career_system.ask_question("What is the  most famous food in egypt")
print("💡 Answer:", response["answer"])

💡 Answer: The most famous food in Egypt is often considered to be "koshari." Koshari is a hearty dish made from a mix of rice, lentils, and pasta, topped with a spicy tomato sauce and garnished with fried onions. It is a popular street food and is loved by many Egyptians for its delicious flavor and affordability. Other notable Egyptian dishes include "ful medames" (stewed fava beans), "ta'ameya" (Egyptian falafel made from fava beans), and "molokhia" (a green soup made from jute leaves). Each of these dishes reflects the rich culinary heritage of Egypt.


In [None]:
# Ask a question
response = career_system.ask_question("do you know chatgpt")
print("💡 Answer:", response["answer"])

💡 Answer: Yes, I'm familiar with ChatGPT. It is a conversational AI chatbot developed by OpenAI that utilizes large language models (LLMs) to generate text-based responses. ChatGPT can assist with a variety of tasks, such as writing essays, composing emails, generating code, and answering questions on a wide range of topics. While it can provide valuable support and enhance productivity, it's important to note that it operates by predicting the most likely sequence of words based on the input it receives, rather than truly understanding the content. This means it can be a useful tool, but it has limitations and should be used thoughtfully, especially in contexts that require critical thinking and creativity.


In [None]:
response = career_system.ask_question("من صنعك")
print("💡 Answer:", response["answer"])

💡 Answer: أنا هنا لمساعدتك في توجيهك في مسيرتك المهنية. إذا كان لديك أي استفسار أو تحتاج إلى نصيحة حول السيرة الذاتية أو تطوير المهارات، فلا تتردد في طرح سؤالك!
