In [1]:
collab = 0

<!-- @format -->

# Ollama Step-Up


In [2]:
from IPython.display import clear_output
!sudo apt-get install -y pciutils
!curl -fsSL https://ollama.com/install.sh | sh # download ollama api
!ollama pull llama3.1:8b
!pip install -U lightrag[ollama]

clear_output()

In [3]:
# Create a Python script to start the Ollama API server in a separate thread

import os
import threading
import subprocess
import requests
import json


def ollama():
    if collab:
        os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
        os.environ['OLLAMA_ORIGINS'] = '*'
        subprocess.Popen(["ollama", "serve"])
    else:
        os.environ['OLLAMA_HOST'] = '127.0.0.1:11434'
        os.environ['OLLAMA_ORIGINS'] = '*'
        subprocess.Popen(["ollama", "serve"])


ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()
clear_output()

ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()
clear_output()

In [4]:
from lightrag.core.generator import Generator
from lightrag.core.component import Component
from lightrag.core.model_client import ModelClient
from lightrag.components.model_client import OllamaClient, GroqAPIClient

import time


qa_template = r"""<SYS>
You are a helpful assistant.
</SYS>
User: {{input_str}}
You:"""


class SimpleQA(Component):
    def __init__(self, model_client: ModelClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=qa_template,
        )

    def call(self, input: dict) -> str:
        return self.generator.call({"input_str": str(input)})

    async def acall(self, input: dict) -> str:
        return await self.generator.acall({"input_str": str(input)})

In [5]:
from lightrag.components.model_client import OllamaClient
from IPython.display import Markdown, display
model = {
    "model_client": OllamaClient(),
    "model_kwargs": {"model": "llama3.1:8b"}
}
qa = SimpleQA(**model)
output = qa("what is 2+2")
display(Markdown(f"**Answer:** {output.data}"))

**Answer:** The answer to 2 + 2 is 4. Would you like help with anything else?

<!-- @format -->

# D


In [6]:
from IPython.display import clear_output
# %pip install nest_asyncio
# %pip install prettytable
# %pip install tqdm
# %pip install -U lightrag[ollama]
# %pip install aiohttp
# %pip install pandas
# %pip install openpyxl
clear_output()

In [7]:
# Example usage
repository_url = "https://github.com/Eemayas/Daraz_Scraper"
# repository_url = "https://github.com/embraceitmobile/animated_tree_view"
# repository_url = "https://github.com/earthPerson-001/Simple-Pendulum-Simulation-Using-OpenGL"

<!-- @format -->

### GitHub MetaData Extraction


In [8]:
import json
from prettytable import PrettyTable
import asyncio
import aiohttp
from typing import Any, Optional, List, Dict
from dataclasses import dataclass
import nest_asyncio

nest_asyncio.apply()


@dataclass
class Contributor:
    name: str
    profile_url: str
    avatar_url: str
    contributions: str


@dataclass
class RepositoryMetadata:
    id: int
    node_id: str
    name: str
    full_name: str
    private: bool
    owner: Dict[str, Any]
    html_url: str
    description: Optional[str]
    fork: bool
    url: str
    forks_url: str
    keys_url: str
    collaborators_url: str
    teams_url: str
    hooks_url: str
    issue_events_url: str
    events_url: str
    assignees_url: str
    branches_url: str
    tags_url: str
    blobs_url: str
    git_tags_url: str
    git_refs_url: str
    trees_url: str
    statuses_url: str
    languages_url: str
    stargazers_url: str
    contributors_url: str
    subscribers_url: str
    subscription_url: str
    commits_url: str
    git_commits_url: str
    comments_url: str
    issue_comment_url: str
    contents_url: str
    compare_url: str
    merges_url: str
    archive_url: str
    downloads_url: str
    issues_url: str
    pulls_url: str
    milestones_url: str
    notifications_url: str
    labels_url: str
    releases_url: str
    deployments_url: str
    created_at: str
    updated_at: str
    pushed_at: str
    git_url: str
    ssh_url: str
    clone_url: str
    svn_url: str
    homepage: Optional[str]
    size: int
    stargazers_count: int
    watchers_count: int
    language: Optional[str]
    has_issues: bool
    has_projects: bool
    has_downloads: bool
    has_wiki: bool
    has_pages: bool
    has_discussions: bool
    forks_count: int
    mirror_url: Optional[str]
    archived: bool
    disabled: bool
    open_issues_count: int
    license: Optional[Dict[str, Any]]
    allow_forking: bool
    is_template: bool
    web_commit_signoff_required: bool
    topics: List[str]
    visibility: str
    forks: int
    open_issues: int
    watchers: int
    default_branch: str
    temp_clone_token: Optional[str]
    network_count: int
    subscribers_count: int
    contributors: List[Contributor]


def _parse_repository_metadata(repo_data: dict, contributors: List[Contributor]) -> RepositoryMetadata:
    owner_info = repo_data.get("owner", {}) or {}
    license_info = repo_data.get("license", {}) or {}

    return RepositoryMetadata(
        id=repo_data.get("id", 0),
        node_id=repo_data.get("node_id", ""),
        name=repo_data.get("name", ""),
        full_name=repo_data.get("full_name", ""),
        private=repo_data.get("private", False),
        owner=owner_info,
        html_url=repo_data.get("html_url", ""),
        description=repo_data.get("description", ""),
        fork=repo_data.get("fork", False),
        url=repo_data.get("url", ""),
        forks_url=repo_data.get("forks_url", ""),
        keys_url=repo_data.get("keys_url", ""),
        collaborators_url=repo_data.get("collaborators_url", ""),
        teams_url=repo_data.get("teams_url", ""),
        hooks_url=repo_data.get("hooks_url", ""),
        issue_events_url=repo_data.get("issue_events_url", ""),
        events_url=repo_data.get("events_url", ""),
        assignees_url=repo_data.get("assignees_url", ""),
        branches_url=repo_data.get("branches_url", ""),
        tags_url=repo_data.get("tags_url", ""),
        blobs_url=repo_data.get("blobs_url", ""),
        git_tags_url=repo_data.get("git_tags_url", ""),
        git_refs_url=repo_data.get("git_refs_url", ""),
        trees_url=repo_data.get("trees_url", ""),
        statuses_url=repo_data.get("statuses_url", ""),
        languages_url=repo_data.get("languages_url", ""),
        stargazers_url=repo_data.get("stargazers_url", ""),
        contributors_url=repo_data.get("contributors_url", ""),
        subscribers_url=repo_data.get("subscribers_url", ""),
        subscription_url=repo_data.get("subscription_url", ""),
        commits_url=repo_data.get("commits_url", ""),
        git_commits_url=repo_data.get("git_commits_url", ""),
        comments_url=repo_data.get("comments_url", ""),
        issue_comment_url=repo_data.get("issue_comment_url", ""),
        contents_url=repo_data.get("contents_url", ""),
        compare_url=repo_data.get("compare_url", ""),
        merges_url=repo_data.get("merges_url", ""),
        archive_url=repo_data.get("archive_url", ""),
        downloads_url=repo_data.get("downloads_url", ""),
        issues_url=repo_data.get("issues_url", ""),
        pulls_url=repo_data.get("pulls_url", ""),
        milestones_url=repo_data.get("milestones_url", ""),
        notifications_url=repo_data.get("notifications_url", ""),
        labels_url=repo_data.get("labels_url", ""),
        releases_url=repo_data.get("releases_url", ""),
        deployments_url=repo_data.get("deployments_url", ""),
        created_at=repo_data.get("created_at", ""),
        updated_at=repo_data.get("updated_at", ""),
        pushed_at=repo_data.get("pushed_at", ""),
        git_url=repo_data.get("git_url", ""),
        ssh_url=repo_data.get("ssh_url", ""),
        clone_url=repo_data.get("clone_url", ""),
        svn_url=repo_data.get("svn_url", ""),
        homepage=repo_data.get("homepage", ""),
        size=repo_data.get("size", 0),
        stargazers_count=repo_data.get("stargazers_count", 0),
        watchers_count=repo_data.get("watchers_count", 0),
        language=repo_data.get("language", ""),
        has_issues=repo_data.get("has_issues", False),
        has_projects=repo_data.get("has_projects", False),
        has_downloads=repo_data.get("has_downloads", False),
        has_wiki=repo_data.get("has_wiki", False),
        has_pages=repo_data.get("has_pages", False),
        has_discussions=repo_data.get("has_discussions", False),
        forks_count=repo_data.get("forks_count", 0),
        mirror_url=repo_data.get("mirror_url", None),
        archived=repo_data.get("archived", False),
        disabled=repo_data.get("disabled", False),
        open_issues_count=repo_data.get("open_issues_count", 0),
        license=license_info,
        allow_forking=repo_data.get("allow_forking", False),
        is_template=repo_data.get("is_template", False),
        web_commit_signoff_required=repo_data.get(
            "web_commit_signoff_required", False),
        topics=repo_data.get("topics", []),
        visibility=repo_data.get("visibility", ""),
        forks=repo_data.get("forks", 0),
        open_issues=repo_data.get("open_issues", 0),
        watchers=repo_data.get("watchers", 0),
        default_branch=repo_data.get("default_branch", ""),
        temp_clone_token=repo_data.get("temp_clone_token", None),
        network_count=repo_data.get("network_count", 0),
        subscribers_count=repo_data.get("subscribers_count", 0),
        contributors=contributors
    )


async def _fetch_repository_metadata(session: aiohttp.ClientSession, url: str) -> dict[str, Any]:
    async with session.get(url) as response:
        response.raise_for_status()
        return await response.json()


async def _fetch_contributors(session: aiohttp.ClientSession, url: str) -> List[Contributor]:
    async with session.get(url) as response:
        response.raise_for_status()
        contributors_data = await response.json()
        return [
            Contributor(
                name=contributor.get("login", ""),
                profile_url=contributor.get("html_url", ""),
                avatar_url=contributor.get("avatar_url", ""),
                contributions=str(contributor.get("contributions", ""))
            )
            for contributor in contributors_data
        ]


async def fetch_git_repository_metadata(session: aiohttp.ClientSession, repository_url: str) -> Optional[RepositoryMetadata]:
    api_url = repository_url.replace(
        "https://github.com/", "https://api.github.com/repos/")

    try:
        metadata = await _fetch_repository_metadata(session, api_url)
        contributors_url = metadata.get("contributors_url", "")
        contributors = await _fetch_contributors(session, contributors_url) if contributors_url else []
        return _parse_repository_metadata(metadata, contributors) if metadata else None
    except aiohttp.ClientError as exc:
        print(f"Client error while fetching repository metadata: {exc}")
        return None


def print_metadata(metadata: RepositoryMetadata):
    table_metadata = PrettyTable()
    table_metadata.field_names = ["Attribute", "Value"]

    for field in metadata.__dataclass_fields__:
        value = getattr(metadata, field)
        if isinstance(value, dict):
            value = json.dumps(value, indent=2)
        elif isinstance(value, list):
            value = ", ".join(str(item) for item in value)
        table_metadata.add_row([field, value])

    print(table_metadata)

    if metadata.contributors:
        contributors_table = PrettyTable()
        contributors_table.field_names = [
            "Contributor Name", "Profile URL", "Avatar URL", "No of Contributions"]
        for contributor in metadata.contributors:
            contributors_table.add_row(
                [contributor.name, contributor.profile_url, contributor.avatar_url, contributor.contributions])
        print(contributors_table)


async def main(repository_url: str):
    async with aiohttp.ClientSession() as session:
        metadata = await fetch_git_repository_metadata(session, repository_url)
        if metadata:
            print_metadata(metadata)
        return metadata

# Example usage
metadata = asyncio.run(main(repository_url))

+-----------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
|          Attribute          |                                                                              Value                                                                              |
+-----------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
|              id             |                                                                            707705001                                                                            |
|           node_id           |                                                                           R_kgDOKi64qQ                                                                          |
|             name            

<!-- @format -->

### Ignore List


In [11]:
ignore_list = [
    # General
    '.git',            # Git repository metadata
    'node_modules',    # Node.js modules
    '.idea',           # JetBrains IDE project files
    '.vscode',         # Visual Studio Code settings
    '__pycache__',     # Python bytecode cache
    '.DS_Store',       # macOS directory metadata
    '.env',            # Environment variable files
    'venv',            # Python virtual environment
    'build',           # Build output directories
    'dist',            # Distribution directories
    'target',          # Output from Java and Rust builds
    '.pytest_cache',   # Pytest cache files
    '*.log',           # Log files
    '*.tmp',           # Temporary files

    # Python
    '*.pyc',           # Compiled Python files
    '.mypy_cache',     # Mypy type checker cache
    '.tox',            # Tox environment

    # JavaScript/Node.js
    'npm-debug.log',   # NPM debug logs
    'yarn-error.log',  # Yarn error logs
    '.parcel-cache',   # Parcel bundler cache
    'coverage',        # Code coverage reports
    '.next',           # Next.js build directory
    'out',             # Output directory for Next.js

    # Java
    '*.class',         # Compiled Java classes
    '*.jar',           # JAR files
    '*.war',           # WAR files
    '.settings',       # Eclipse settings
    '.classpath',      # Eclipse classpath
    '.project',        # Eclipse project file

    # C/C++
    '*.o',             # Object files
    '*.a',             # Static libraries
    '*.so',            # Shared libraries
    '*.out',           # Executable files
    '*.exe',           # Windows executables
    'CMakeFiles',      # CMake build files
    'CMakeCache.txt',  # CMake cache
    '*.dSYM',          # macOS debug symbols
    '*.pdb',           # Windows debug symbols

    # Rust
    '*.rlib',          # Rust libraries
    'Cargo.lock',      # Cargo lock file

    # Go
    'bin',             # Binary output directory
    'pkg',             # Package output directory
    '*.test',          # Go test binaries
    'vendor',          # Vendor directory (if not used)

    # Ruby
    '.bundle',         # Bundler directory
    'vendor/bundle',   # Bundled gems
    'log',             # Log files
    'tmp',             # Temporary files
    '.gem',            # RubyGems metadata

    # PHP
    'vendor',          # Composer dependencies
    '.phpunit.result.cache',  # PHPUnit result cache

    # Android
    '.gradle',         # Gradle files
    '*.apk',           # Android package
    '*.ap_ ',          # Android resources package
    'local.properties',  # Android SDK settings

    # .NET/C#
    'bin',             # Binary output directory
    'obj',             # Object files directory
    '*.dll',           # DLL files
    '*.user',          # User settings
    'packages',        # NuGet packages

    # LaTeX
    '*.aux',           # Auxiliary files
    '*.toc',           # Table of contents
    '*.out',           # Auxiliary output files
    '*.synctex.gz',    # SyncTeX file
    '*.fls',           # LaTeX build files
    '*.fdb_latexmk',   # LaTeX build files
]

# Specify the file extensions to ignore
# Specify the file extensions to ignore
ignore_extensions = [
    # Image formats
    '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.svg', '.tiff', '.webp', '.heif', '.heic', '.ico', '.raw', '.psd',

    # Audio formats
    '.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma', '.aiff', '.alac', '.pcm',

    # Video formats
    '.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.m4v', '.mpg', '.mpeg', '.3gp', '.ogv', '.rm', '.swf'
]
api_additional_extensions = [
    # Text and document formats
    '.copy', '.local', '.json', '.config', '.md', '.txt', '.log', '.yml', '.yaml', '.xml', '.ini', '.pdf', '.csv', '.tsv',

    # Font formats
    '.woff', '.woff2', '.ttf', '.eot', '.otf',

    # Configuration and map files
    '.config.ts', '.map', '.lock',

    # Styling files
    '.css', '.scss', '.sass', '.less', '.styl', '.pcss', '.postcss'
]
api_ignore_extensions = ignore_extensions + api_additional_extensions
specific_ignores_api = ['.gitignore', '.config.js', '.config.ts']

<!-- @format -->

### Folder Structure


In [12]:
import subprocess
import os
from pathlib import Path
from typing import List, Optional
import asyncio


def print_folder_structure(dir_path: Path, level: int = -1, limit_to_directories: bool = False, length_limit: int = 1000, ignore_list: List[str] = None) -> List[str]:
    """Generate a visual tree structure of the directory contents.

    Args:
        dir_path (Path): The root directory to start the tree from.
        level (int, optional): The depth of recursion. Defaults to -1 (no limit).
        limit_to_directories (bool, optional): If True, only directories are listed. Defaults to False.
        length_limit (int, optional): Limits the number of lines output. Defaults to 1000.
        ignore_list (List[str], optional): A list of directory or file names to ignore. Defaults to None.

    Returns:
        List[str]: A list of strings representing the directory tree structure.
    """
    space = '    '
    branch = '│   '
    tee = '├── '
    last = '└── '
    dir_path = Path(dir_path)  # Ensure dir_path is a Path object
    files = 0
    directories = 0
    output = []

    if ignore_list is None:
        ignore_list = []

    def inner(dir_path: Path, prefix: str = '', level: int = -1):
        nonlocal files, directories
        if level == 0:
            return  # Stop recursion if level is 0
        if limit_to_directories:
            contents = [d for d in dir_path.iterdir() if d.is_dir()
                        and d.name not in ignore_list]
        else:
            contents = [d for d in dir_path.iterdir()
                        if d.name not in ignore_list]
        pointers = [tee] * (len(contents) - 1) + [last]
        for pointer, path in zip(pointers, contents):
            if path.is_dir():
                output.append(prefix + pointer + path.name + "/")
                directories += 1
                extension = branch if pointer == tee else space
                inner(path, prefix=prefix + extension, level=level - 1)
            elif not limit_to_directories:
                output.append(prefix + pointer + path.name)
                files += 1

    # Add the root directory name
    output.append(dir_path.name + "/")
    # Create an iterator from the inner function
    inner(dir_path, level=level)
    # Limit the output by length_limit
    if len(output) > length_limit:
        output = output[:length_limit]
        output.append(f'... length_limit, {length_limit}, reached, counted:')
    # Add the summary of directories and files
    output.append(f'\n{directories} directories' +
                  (f', {files} files' if files else ''))

    return output


async def clone_github_repo(repository_url: str) -> Optional[str]:
    repo_name = repository_url.split('/')[-1]

    if not os.path.exists(repo_name):
        print(f"Cloning repository from {repository_url}...")
        try:
            subprocess.run(['git', 'clone', repository_url], check=True)
            print(f"Repository cloned into {repo_name}/")
            return repo_name
        except subprocess.CalledProcessError as e:
            print(f"Error cloning repository: {e}")
            return None
    else:
        print(
            f"Repository folder '{repo_name}' already exists. Skipping clone.")
        return repo_name

# Clone the repository
repo_name = await clone_github_repo(repository_url=repository_url)

if repo_name:
    # Print the folder structure
    folder_structure = print_folder_structure(
        dir_path=Path(repo_name),
        ignore_list=ignore_list
    )
    folder_structure_str = "\n".join(folder_structure)
    folder_structure_markdown = (
        "# Folder Structure\n" +
        "```sh\n" +
        folder_structure_str + "\n" +
        "```"
    )
    print(folder_structure_markdown)
else:
    print("Repository cloning failed or was skipped.")

Repository folder 'Daraz_Scraper' already exists. Skipping clone.
# Folder Structure
```sh
Daraz_Scraper/
├── .env.local.copy
├── .gitignore
├── app/
│   ├── api/
│   │   └── cron/
│   │       └── route.ts
│   ├── favicon.ico
│   ├── globals.css
│   ├── layout.tsx
│   ├── page.tsx
│   └── products/
│       └── [id]/
│           └── page.tsx
├── components/
│   ├── HeroCarousel.tsx
│   ├── Modal.tsx
│   ├── Navbar.tsx
│   ├── PriceInfoCard.tsx
│   ├── ProductCard.tsx
│   └── Searchbar.tsx
├── lib/
│   ├── action/
│   │   └── index.ts
│   ├── models/
│   │   └── product.model.ts
│   ├── mongoose.ts
│   ├── nodemailer/
│   │   └── index.ts
│   ├── scrapper/
│   │   └── index.ts
│   └── utils.ts
├── next.config.js
├── package-lock.json
├── package.json
├── postcss.config.js
├── public/
│   ├── assets/
│   │   ├── icons/
│   │   │   ├── arrow-down.svg
│   │   │   ├── arrow-right.svg
│   │   │   ├── arrow-up.svg
│   │   │   ├── bag.svg
│   │   │   ├── black-heart.svg
│   │   │   ├── bookmark

<!-- @format -->

### Summary Generation


In [13]:
import os
from pathlib import Path
from typing import List, Dict
from prettytable import PrettyTable
from lightrag.core.generator import Generator
from lightrag.core.component import Component
from lightrag.components.model_client import OllamaClient
from tqdm import tqdm

summary_template = r"""<SYS>
You are a summarization assistant specialized in coding files.
</SYS>
Please summarize the following code:
{{input_str}}
Summary:"""


class SummaryQA(Component):
    def __init__(self, model_client: OllamaClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=summary_template,
        )

    def call(self, input: str) -> str:
        return self.generator.call({"input_str": input})

    async def acall(self, input: str) -> str:
        return await self.generator.acall({"input_str": input})


def generate_summary(path: Path, ignore_list: List[str], qa_component: SummaryQA, ignore_extensions: List[str]) -> List[Dict[str, str]]:
    """Generate a summary of files in the given path using the model."""
    summary = []
    files_to_process = []

    for root, dirs, files in os.walk(path):
        # Get relative path for the current directory
        relative_root = os.path.relpath(root, path)

        # Check if the directory should be ignored
        if any(ignored in relative_root.split(os.sep) for ignored in ignore_list):
            continue

        if relative_root == '.':
            summary.append({"file": "Modules", "description": "."})
        else:
            summary.append(
                {"file": relative_root, "description": "Not a File"})

        # List files in the current directory
        for file in files:
            file_path = Path(root) / file

            # Check if the file should be ignored
            if any(ignored in file_path.parts for ignored in ignore_list):
                continue

            # Check if the file has an extension that should be skipped
            if file_path.suffix.lower() in ignore_extensions:
                continue

            files_to_process.append(file_path)

    # Use tqdm to display progress
    pbar = tqdm(files_to_process, unit="file")
    for file_path in pbar:
        # Update the description dynamically
        pbar.set_description(f"Processing files - {file_path}")
        try:
            # Read file content
            with open(file_path, 'r') as f:
                file_content = f.read()

            # Generate summary using the model
            summary_text = qa_component.call(file_content)
            summary.append({"file": file_path, "description": summary_text})
        except Exception as e:
            summary.append(
                {"file": file_path, "description": f"Error processing file: {str(e)}"})

    return summary


# Create the QA component
model = {
    "model_client": OllamaClient(),
    "model_kwargs": {"model": "llama3.1:8b"}
}
qa = SummaryQA(**model)


if repo_name:
    path = Path(repo_name)
    if not path.is_dir():
        print(f"The path {path} is not a directory.")
    summary = generate_summary(path, ignore_list=ignore_list,
                               qa_component=qa, ignore_extensions=ignore_extensions)
    table_summary = PrettyTable()
    table_summary.field_names = ["File", "Description"]

    for item in summary:
        table_summary.add_row([item["file"], item["description"]])

    print(table_summary)
else:
    print("Repository cloning failed or was skipped.")

Processing files - Daraz_Scraper\types\index.ts: 100%|██████████| 28/28 [02:11<00:00,  4.69s/file]              

+--------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------




In [14]:
import pandas as pd
from pathlib import Path
from prettytable import PrettyTable

# Sample function to check if an object has the `data` attribute


def get_description_data(description):
    if hasattr(description, 'data'):
        return description.data
    return description


if summary:
    # Initialize PrettyTable
    table_summary = PrettyTable()
    table_summary.field_names = ["File", "Description"]

    # Create a list to hold data for the DataFrame
    data_for_excel = []

    for item in summary:
        description_data = get_description_data(item["description"])
        table_summary.add_row([item["file"], description_data])
        data_for_excel.append(
            {"File": item["file"], "Description": description_data})

    # Print the PrettyTable
    print(table_summary)

    # Convert the list to a DataFrame
    df_summary = pd.DataFrame(data_for_excel)

    # Define the path and name for the Excel file
    excel_path = 'summary.xlsx'

    # Save DataFrame to an Excel file
    df_summary.to_excel(excel_path, index=False, engine='openpyxl')
    print(f"Summary saved to {excel_path}")

+--------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                    File                    |                                                                                                                                             Description                                                                                                                                              |
+--------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [15]:
from prettytable import PrettyTable

# Sample function to check if an object has the `data` attribute


def get_description_data(description):
    if hasattr(description, 'data'):
        return description.data
    return description


def is_empty_or_error(description):
    description_str = get_description_data(
        description).strip() if isinstance(description, str) else ""
    return not description_str or "HTTP error 401" in description_str


if summary:
    # Variable to store rows before adding to the table
    blank_error_summary = []

    for item in summary:
        description_data = get_description_data(item["description"])
        if is_empty_or_error(description_data):
            # Save the row data into a variable
            row = [item["file"], description_data]
            blank_error_summary.append(row)

    # Initialize PrettyTable
    retry_table = PrettyTable()
    retry_table.field_names = ["File", "Description"]

    # Add the saved rows to the table
    for row in blank_error_summary:
        retry_table.add_row(row)

    # Print the PrettyTable
    print(retry_table)

else:
    print("NO SUMMARY DATA AVAILABLE")

+------+-------------+
| File | Description |
+------+-------------+
+------+-------------+


In [16]:
def generate_summary_for_file(file_path: Path, qa_component: SummaryQA, existing_summaries: List[Dict[str, str]]) -> List[Dict[str, str]]:
    """Generate or update a summary for a single file using the model."""
    file_name = file_path.name
    updated = False

    # Check if the file summary already exists
    for summary in existing_summaries:
        if summary["file"] == file_name:
            updated = True
            break

    if not updated:
        existing_summaries.append({"file": file_name, "description": ""})

    try:
        # Read file content
        with open(file_path, 'r') as f:
            file_content = f.read()

        # Generate summary using the model
        summary_text = qa_component.call(file_content)

        # Update the summary in the list
        for summary in existing_summaries:
            if summary["file"] == file_name:
                summary["description"] = summary_text
                break
    except Exception as e:
        for summary in existing_summaries:
            if summary["file"] == file_name:
                summary["description"] = f"Error processing file {file_path}: {str(e)}"
                break

    return existing_summaries


if blank_error_summary:
    # Initialize an empty list to store summaries
    summaries = []

    # Prompt the user for a file path
    user_input = input(

        "Please enter the path to the file you want to summarize: ")
    file_path = Path(user_input)

    if file_path.is_file():
        # Generate or update the summary for the specified file
        summaries = generate_summary_for_file(file_path, qa, summaries)

        # Print the summary using PrettyTable
        table_summary = PrettyTable()
        table_summary.field_names = ["File", "Description"]

        for summary in summaries:
            table_summary.add_row([summary["file"], summary["description"]])

        print(table_summary)
    else:
        print(f"The path {file_path} is not a valid file.")

In [17]:
if summary:
    # Combine summaries, ignoring "Not a File" or error messages
    combined_summary = " ".join([
        get_description_data(item['description'])
        for item in summary
        if get_description_data(item['description']) and get_description_data(item['description']) != "Not a File" and get_description_data(item['description']) != "." and not get_description_data(item['description']).startswith("HTTP error 401")
    ])
    print(combined_summary)

It appears you haven't provided any code for me to summarize. Please paste the code, and I'll be happy to assist you in summarizing it. The summary will include an overview of what the code does, key functions or components, and possibly notable algorithms or approaches used within the code. This code is a `.gitignore` file, which lists specific files and directories that should be ignored by Git.

In summary, this `.gitignore` file instructs Git to ignore:

* Dependencies and project setup files (e.g., `/node_modules`, `.pnp`)
* Testing-related files and directories (e.g., `/coverage`)
* Next.js build outputs (e.g., `/.next/`, `/build`)
* Miscellaneous files (e.g., `.DS_Store`, `.pem` files)
* Debug logs and error reports (e.g., `npm-debug.log`, `yarn-error.log`)
* Local environment configuration files (e.g., `.env.local`)
* Vercel-related files (e.g., `.vercel`)
* TypeScript build outputs (e.g., `*.tsbuildinfo`, `next-env.d.ts`)

By ignoring these files, this `.gitignore` file helps 

<!-- @format -->

### Header


<!-- @format -->

#### Project Image


In [18]:
project_icons = {
    "ecommerce": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png"
    },
    "banking": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/bank.png"
    },
    "school": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/school.png"
    },
    "education": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/graduation-cap.png"
    },
    "work": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/briefcase.png"
    },
    "healthcare": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/hospital-room.png"
    },
    "real_estate": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/home.png"
    },
    "travel": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/passport.png"
    },
    "social_media": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/share.png"
    },
    "fitness": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/dumbbell.png"
    },
    "news": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/news.png"
    },
    "entertainment": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/clapperboard.png"
    },
    "food_delivery": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/food-delivery.png"
    },
    "finance": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/money.png"
    },
    "transportation": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/bus.png"
    },
    "hospitality": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/hotel.png"
    },
    "music": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/musical-notes.png"
    },
    "gaming": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/controller.png"
    },
    "environment": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/earth-planet.png"
    },
    "nonprofit": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/charity.png"
    },
    "photography": {
        "icon": "https://img.icons8.com/nolan/512/1A6DFF/C822FF/camera.png"
    }
}


def get_project_icon():
    print("Select the project type:")
    for i, key in enumerate(project_icons.keys(), start=1):
        print(f"{i}. {key.capitalize()}")
    print(f"{len(project_icons) + 1}. Custom")
    print(f"{len(project_icons) + 2}. None")

    choice = input("Enter the number corresponding to the project type: ")

    if choice.isdigit():
        choice = int(choice)

        if 1 <= choice <= len(project_icons):
            selected_type = list(project_icons.keys())[choice - 1]
            icon_url = project_icons[selected_type]['icon']
            return icon_url

        elif choice == len(project_icons) + 1:  # Custom
            custom_link = input("Enter the custom link/path: ")
            return custom_link

        elif choice == len(project_icons) + 2:  # None
            return None

    print("Invalid choice. Please try again.")
    return None


# Get the icon URL or custom path
icon_url = get_project_icon()


if icon_url:
    project_image_markdown = f'''
<p align="center">
    <img src="{icon_url}" width="200" style="border-radius: 20px;" />
</p>
    '''
else:
    project_image_markdown = ""
print(project_image_markdown)
display(Markdown(project_image_markdown))

Select the project type:
1. Ecommerce
2. Banking
3. School
4. Education
5. Work
6. Healthcare
7. Real_estate
8. Travel
9. Social_media
10. Fitness
11. News
12. Entertainment
13. Food_delivery
14. Finance
15. Transportation
16. Hospitality
17. Music
18. Gaming
19. Environment
20. Nonprofit
21. Photography
22. Custom
23. None

<p align="center">
    <img src="https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png" width="200" style="border-radius: 20px;" />
</p>
    



<p align="center">
    <img src="https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png" width="200" style="border-radius: 20px;" />
</p>
    

<!-- @format -->

#### Project Name


In [19]:
project_name_markdown = f'''
<p align="center">
    <h1 align="center">{metadata.name}</h1>
</p>
    '''
print(project_name_markdown)
display(Markdown(project_name_markdown))


<p align="center">
    <h1 align="center">Daraz_Scraper</h1>
</p>
    



<p align="center">
    <h1 align="center">Daraz_Scraper</h1>
</p>
    

<!-- @format -->

#### GitHub Status Badges


In [20]:
def update_github_badge_urls(github_repo_link, badges):
    # Extract the owner and repository name from the GitHub link
    repo_path = github_repo_link.rstrip('/').replace('https://github.com/', '')

    # Define the badge URLs
    badge_urls = {
        "license": f"https://img.shields.io/github/license/{repo_path}?style=flat&color=0080ff",
        "last-commit": f"https://img.shields.io/github/last-commit/{repo_path}?style=flat&logo=git&logoColor=white&color=0080ff",
        "repo-top-language": f"https://img.shields.io/github/languages/top/{repo_path}?style=flat&color=0080ff",
        "repo-language-count": f"https://img.shields.io/github/languages/count/{repo_path}?style=flat&color=0080ff",
        "build-status": f"https://img.shields.io/github/actions/workflow/status/{repo_path}/build.yml?branch=main&style=flat&color=0080ff",
        "open-issues": f"https://img.shields.io/github/issues/{repo_path}?style=flat&color=0080ff",
        "forks": f"https://img.shields.io/github/forks/{repo_path}?style=flat&color=0080ff",
        "stars": f"https://img.shields.io/github/stars/{repo_path}?style=flat&color=0080ff",
        "pull-requests": f"https://img.shields.io/github/issues-pr/{repo_path}?style=flat&color=0080ff",
        "contributors": f"https://img.shields.io/github/contributors/{repo_path}?style=flat&color=0080ff",
        "commit-activity": f"https://img.shields.io/github/commit-activity/m/{repo_path}?style=flat&color=0080ff",
        "code-size": f"https://img.shields.io/github/languages/code-size/{repo_path}?style=flat&color=0080ff",
        "repo-size": f"https://img.shields.io/github/repo-size/{repo_path}?style=flat&color=0080ff",
        "downloads": f"https://img.shields.io/github/downloads/{repo_path}/total?style=flat&color=0080ff",
        "sponsors": f"https://img.shields.io/github/sponsors/{repo_path}?style=flat&color=0080ff",
        "release-version": f"https://img.shields.io/github/v/release/{repo_path}?style=flat&color=0080ff",
        "coverage": f"https://img.shields.io/codecov/c/github/{repo_path}?style=flat&color=0080ff",
        "code-quality": f"https://img.shields.io/codeclimate/quality/a/{repo_path}?style=flat&color=0080ff",
        "dependencies": f"https://img.shields.io/david/{repo_path}?style=flat&color=0080ff",
        "dev-dependencies": f"https://img.shields.io/david/dev/{repo_path}?style=flat&color=0080ff",
        "security": f"https://img.shields.io/snyk/vulnerabilities/github/{repo_path}?style=flat&color=0080ff",
        "performance": f"https://img.shields.io/website?style=flat&color=0080ff&url=https%3A%2F%2Fexample.com",
        "activity": f"https://img.shields.io/github/commit-activity/y/{repo_path}?style=flat&color=0080ff",
        "documentation": f"https://img.shields.io/docsify/docs?style=flat&color=0080ff",
        "version": f"https://img.shields.io/github/v/tag/{repo_path}?style=flat&color=0080ff"
    }

    # Create the HTML string based on the selected badges
    badges_html = '\n'.join(
        f'  <img src="{badge_urls[badge]}" alt="{badge}">' for badge in badges if badge in badge_urls)

    html_template = f'''
<p align="center">
{badges_html}
</p>
    '''

    return html_template


selected_badges = [
    "license", "last-commit", "repo-top-language", "repo-language-count",
    "build-status", "open-issues", "forks", "stars", "pull-requests",
    "contributors", "commit-activity", "code-size", "repo-size",
    "downloads", "sponsors", "release-version", "coverage", "code-quality",
    "dependencies", "dev-dependencies", "security", "performance",
    "activity", "documentation", "version"
]
github_badge_markdown = update_github_badge_urls(
    repository_url, selected_badges)
print(github_badge_markdown)
display(Markdown(github_badge_markdown))


<p align="center">
  <img src="https://img.shields.io/github/license/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="license">
  <img src="https://img.shields.io/github/last-commit/Eemayas/Daraz_Scraper?style=flat&logo=git&logoColor=white&color=0080ff" alt="last-commit">
  <img src="https://img.shields.io/github/languages/top/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-top-language">
  <img src="https://img.shields.io/github/languages/count/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-language-count">
  <img src="https://img.shields.io/github/actions/workflow/status/Eemayas/Daraz_Scraper/build.yml?branch=main&style=flat&color=0080ff" alt="build-status">
  <img src="https://img.shields.io/github/issues/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="open-issues">
  <img src="https://img.shields.io/github/forks/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="forks">
  <img src="https://img.shields.io/github/stars/Eemayas/Daraz_Scraper?style=flat&colo


<p align="center">
  <img src="https://img.shields.io/github/license/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="license">
  <img src="https://img.shields.io/github/last-commit/Eemayas/Daraz_Scraper?style=flat&logo=git&logoColor=white&color=0080ff" alt="last-commit">
  <img src="https://img.shields.io/github/languages/top/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-top-language">
  <img src="https://img.shields.io/github/languages/count/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-language-count">
  <img src="https://img.shields.io/github/actions/workflow/status/Eemayas/Daraz_Scraper/build.yml?branch=main&style=flat&color=0080ff" alt="build-status">
  <img src="https://img.shields.io/github/issues/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="open-issues">
  <img src="https://img.shields.io/github/forks/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="forks">
  <img src="https://img.shields.io/github/stars/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="stars">
  <img src="https://img.shields.io/github/issues-pr/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="pull-requests">
  <img src="https://img.shields.io/github/contributors/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="contributors">
  <img src="https://img.shields.io/github/commit-activity/m/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="commit-activity">
  <img src="https://img.shields.io/github/languages/code-size/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="code-size">
  <img src="https://img.shields.io/github/repo-size/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-size">
  <img src="https://img.shields.io/github/downloads/Eemayas/Daraz_Scraper/total?style=flat&color=0080ff" alt="downloads">
  <img src="https://img.shields.io/github/sponsors/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="sponsors">
  <img src="https://img.shields.io/github/v/release/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="release-version">
  <img src="https://img.shields.io/codecov/c/github/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="coverage">
  <img src="https://img.shields.io/codeclimate/quality/a/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="code-quality">
  <img src="https://img.shields.io/david/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="dependencies">
  <img src="https://img.shields.io/david/dev/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="dev-dependencies">
  <img src="https://img.shields.io/snyk/vulnerabilities/github/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="security">
  <img src="https://img.shields.io/website?style=flat&color=0080ff&url=https%3A%2F%2Fexample.com" alt="performance">
  <img src="https://img.shields.io/github/commit-activity/y/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="activity">
  <img src="https://img.shields.io/docsify/docs?style=flat&color=0080ff" alt="documentation">
  <img src="https://img.shields.io/github/v/tag/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="version">
</p>
    

<!-- @format -->

#### Language and Framework Badges


In [21]:
# Define known file extensions and configurations for different languages and tools
extensions = {
    'Python': ['.py'],
    'JavaScript': ['.js', '.jsx', '.ts', '.tsx'],
    'Java': ['.java', '.class', '.jar', '.xml'],
    'C++': ['.c', '.cpp', '.h', '.hpp', '.cc', '.cxx'],
    'C#': ['.cs', '.csproj'],
    'Ruby': ['.rb', '.gemspec', '.ru'],
    'PHP': ['.php', '.phtml'],
    'Swift': ['.swift'],
    'Go': ['.go'],
    'Rust': ['.rs'],
    'Kotlin': ['.kt', '.kts'],
    'R': ['.R', '.r', '.rmd'],
    'SQL': ['.sql'],
    'HTML5': ['.html', '.htm', '.css'],
    'CSS3': ['.css'],
    'TypeScript': ['.ts', '.tsx'],
    'Scala': ['.scala', '.sbt'],
    'Perl': ['.pl', '.pm'],
    'Objective-C': ['.m', '.h'],
    'Shell': ['.sh', '.bash', '.zsh'],
    'PowerShell': ['.ps1'],
    'Haskell': ['.hs', '.cabal'],
    'Lua': ['.lua'],
    'Erlang': ['.erl', '.hrl'],
    'Groovy': ['.groovy', '.gvy'],
    'VHDL': ['.vhdl', '.vhd']
}
frameworks = {
    'React': ['.jsx', '.tsx'],
    'Angular': ['angular.json'],
    'Vue.js': ['.vue'],
    'Django': ['settings.py', 'urls.py'],
    'Flask': ['.py'],
    'Spring': ['.xml'],
    'Maven': ['pom.xml'],
    'Gradle': ['build.gradle', 'settings.gradle'],
    'Rails': ['Gemfile', 'config.ru'],
    'Laravel': ['composer.json'],
    'Symfony': ['composer.json'],
    'Next.js': ['next.config.js'],
    'Gatsby': ['gatsby-config.js', 'gatsby-node.js'],
    'Svelte': ['.svelte'],
    'Bootstrap': ['bootstrap.min.css', 'bootstrap.min.js'],
    'Jasmine': ['jasmine.json'],
    'Mocha': ['mocha.opts'],
    'Express': ['app.js', 'server.js'],
    'Sails.js': ['config/', 'api/'],
    'ASP.NET': ['.cshtml', '.vbhtml', 'web.config'],
    'Spring Boot': ['application.properties', 'application.yml'],
    'Quasar': ['quasar.conf.js'],
    'Electron': ['main.js', 'package.json']
}
tools = {
    'Webpack': ['webpack.config.js'],
    'Docker': ['Dockerfile'],
    'CI/CD': ['.github/workflows/ci.yml', '.gitlab-ci.yml', 'Jenkinsfile'],
    'Babel': ['.babelrc', 'babel.config.json'],
    'ESLint': ['.eslintrc', '.eslintrc.js', '.eslintrc.json'],
    'Prettier': ['.prettierrc', '.prettierrc.json'],
    'Jest': ['jest.config.js', 'jest.config.json'],
    'Travis CI': ['.travis.yml'],
    'CircleCI': ['.circleci/config.yml'],
    'Appveyor': ['appveyor.yml'],
    'Composer': ['composer.json', 'composer.lock'],
    'Puppet': ['manifest.pp', 'Puppetfile'],
    'Ansible': ['ansible.cfg', 'playbook.yml'],
    'Kubernetes': ['deployment.yaml', 'service.yaml'],
    'Terraform': ['.tf', 'main.tf']
}

In [48]:
import json
import requests
import os

languages_found = set()
frameworks_found = set()
tools_found = set()


def identify_project(root_folder):

    for root, _, files in os.walk(root_folder):
        for file in files:
            ext = os.path.splitext(file)[1]
            if ext in [e for exts in extensions.values() for e in exts]:
                for language, exts in extensions.items():
                    if ext in exts:
                        languages_found.add(language)
            for framework, config_files in frameworks.items():
                if file in config_files:
                    frameworks_found.add(framework)
            for tool, config_files in tools.items():
                if file in config_files:
                    tools_found.add(tool)

    print("Languages found:", ", ".join(languages_found) or "None")
    print("Frameworks found:", ", ".join(frameworks_found) or "None")
    print("Tools found:", ", ".join(tools_found) or "None")


def get_languages_from_github(languages_url):
    # Get the list of languages used in the GitHub repo
    response = requests.get(languages_url)
    response.raise_for_status()  # Raise an error for bad responses
    languages = response.json()
    return languages

# Load the shields.io icons JSON


def load_shields_data(filename):
    with open(filename, 'r') as file:
        return json.load(file)


def get_shields_urls(technologies, shields_data):
    results = {}
    for tech in technologies:
        if tech in shields_data:
            url, _ = shields_data[tech]
            results[tech] = url.format('for-the-badge')
        else:
            # Fallback URL format if technology not found in shields_data
            fallback_url = f"https://img.shields.io/badge/{tech}-ED8B00?logo={tech}&logoColor=white"
            results[tech] = fallback_url
    return results


def generate_language_badges(urls_map):
    # Create the HTML string with badges
    badges_html = '\n'.join(
        f'  <img src="{urls_map[language]}" alt="{language}">' for language in urls_map.keys())

    html_template = f'''
<p align="center">
{badges_html}
</p>
    '''

    return html_template


root_folder_path = repo_name
if os.path.isdir(root_folder_path):
    identify_project(root_folder_path)
    languages_from_github = get_languages_from_github(metadata.languages_url)
    for language in languages_from_github.keys():
        languages_found.add(language)
    # Load shields.io data
    shields_data = load_shields_data("./shieldsio_icons.json")

    # Combine results and search in shields.io data
    combined_results = {**{lang: None for lang in languages_found}, **
                        {framework: None for framework in frameworks_found}, **{tool: None for tool in tools_found}}

    # Get URLs for combined results
    bagdges_urls_map = get_shields_urls(combined_results.keys(), shields_data)

    # Print results
    if bagdges_urls_map:
        print(bagdges_urls_map)
        language_badges_markdown =f'''
<p align="center">
	<em>Constructed using the following tools and technologies:</em>
</p>
{generate_language_badges(bagdges_urls_map)}
'''
        print(language_badges_markdown)
        display(Markdown(language_badges_markdown))
    else:
        print("No matching languages, frameworks, or tools found in shields.io data")
else:
    print("Invalid directory path")

Languages found: JavaScript, CSS3, HTML5, TypeScript
Frameworks found: Electron, Next.js
Tools found: None
{'CSS3': 'https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white', 'TypeScript': 'https://img.shields.io/badge/TypeScript-3178C6.svg?style=for-the-badge&logo=TypeScript&logoColor=white', 'JavaScript': 'https://img.shields.io/badge/JavaScript-F7DF1E.svg?style=for-the-badge&logo=JavaScript&logoColor=black', 'HTML5': 'https://img.shields.io/badge/HTML5-E34F26.svg?style=for-the-badge&logo=HTML5&logoColor=white', 'CSS': 'https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white', 'Electron': 'https://img.shields.io/badge/Electron-47848F.svg?style=for-the-badge&logo=Electron&logoColor=white', 'Next.js': 'https://img.shields.io/badge/Next.js-000000.svg?style=for-the-badge&logo=nextdotjs&logoColor=white'}

<p align="center">
	<em>Constructed using the following tools and technologies:</em>
</p>

<p align="center">
  <img


<p align="center">
	<em>Constructed using the following tools and technologies:</em>
</p>

<p align="center">
  <img src="https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white" alt="CSS3">
  <img src="https://img.shields.io/badge/TypeScript-3178C6.svg?style=for-the-badge&logo=TypeScript&logoColor=white" alt="TypeScript">
  <img src="https://img.shields.io/badge/JavaScript-F7DF1E.svg?style=for-the-badge&logo=JavaScript&logoColor=black" alt="JavaScript">
  <img src="https://img.shields.io/badge/HTML5-E34F26.svg?style=for-the-badge&logo=HTML5&logoColor=white" alt="HTML5">
  <img src="https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white" alt="CSS">
  <img src="https://img.shields.io/badge/Electron-47848F.svg?style=for-the-badge&logo=Electron&logoColor=white" alt="Electron">
  <img src="https://img.shields.io/badge/Next.js-000000.svg?style=for-the-badge&logo=nextdotjs&logoColor=white" alt="Next.js">
</p>
    


<!-- @format -->

#### Combining all for Header


In [49]:
header_markdown =\
    project_image_markdown+"\n" +\
    project_name_markdown+"\n" +\
    github_badge_markdown+"\n" +\
    language_badges_markdown+"\n" +\
    ""
print(header_markdown)
display(Markdown(header_markdown))


<p align="center">
    <img src="https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png" width="200" style="border-radius: 20px;" />
</p>
    

<p align="center">
    <h1 align="center">Daraz_Scraper</h1>
</p>
    

<p align="center">
  <img src="https://img.shields.io/github/license/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="license">
  <img src="https://img.shields.io/github/last-commit/Eemayas/Daraz_Scraper?style=flat&logo=git&logoColor=white&color=0080ff" alt="last-commit">
  <img src="https://img.shields.io/github/languages/top/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-top-language">
  <img src="https://img.shields.io/github/languages/count/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-language-count">
  <img src="https://img.shields.io/github/actions/workflow/status/Eemayas/Daraz_Scraper/build.yml?branch=main&style=flat&color=0080ff" alt="build-status">
  <img src="https://img.shields.io/github/issues/Eemayas/Daraz_Scraper?style=f


<p align="center">
    <img src="https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png" width="200" style="border-radius: 20px;" />
</p>
    

<p align="center">
    <h1 align="center">Daraz_Scraper</h1>
</p>
    

<p align="center">
  <img src="https://img.shields.io/github/license/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="license">
  <img src="https://img.shields.io/github/last-commit/Eemayas/Daraz_Scraper?style=flat&logo=git&logoColor=white&color=0080ff" alt="last-commit">
  <img src="https://img.shields.io/github/languages/top/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-top-language">
  <img src="https://img.shields.io/github/languages/count/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-language-count">
  <img src="https://img.shields.io/github/actions/workflow/status/Eemayas/Daraz_Scraper/build.yml?branch=main&style=flat&color=0080ff" alt="build-status">
  <img src="https://img.shields.io/github/issues/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="open-issues">
  <img src="https://img.shields.io/github/forks/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="forks">
  <img src="https://img.shields.io/github/stars/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="stars">
  <img src="https://img.shields.io/github/issues-pr/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="pull-requests">
  <img src="https://img.shields.io/github/contributors/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="contributors">
  <img src="https://img.shields.io/github/commit-activity/m/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="commit-activity">
  <img src="https://img.shields.io/github/languages/code-size/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="code-size">
  <img src="https://img.shields.io/github/repo-size/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-size">
  <img src="https://img.shields.io/github/downloads/Eemayas/Daraz_Scraper/total?style=flat&color=0080ff" alt="downloads">
  <img src="https://img.shields.io/github/sponsors/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="sponsors">
  <img src="https://img.shields.io/github/v/release/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="release-version">
  <img src="https://img.shields.io/codecov/c/github/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="coverage">
  <img src="https://img.shields.io/codeclimate/quality/a/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="code-quality">
  <img src="https://img.shields.io/david/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="dependencies">
  <img src="https://img.shields.io/david/dev/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="dev-dependencies">
  <img src="https://img.shields.io/snyk/vulnerabilities/github/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="security">
  <img src="https://img.shields.io/website?style=flat&color=0080ff&url=https%3A%2F%2Fexample.com" alt="performance">
  <img src="https://img.shields.io/github/commit-activity/y/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="activity">
  <img src="https://img.shields.io/docsify/docs?style=flat&color=0080ff" alt="documentation">
  <img src="https://img.shields.io/github/v/tag/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="version">
</p>
    

<p align="center">
	<em>Constructed using the following tools and technologies:</em>
</p>

<p align="center">
  <img src="https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white" alt="CSS3">
  <img src="https://img.shields.io/badge/TypeScript-3178C6.svg?style=for-the-badge&logo=TypeScript&logoColor=white" alt="TypeScript">
  <img src="https://img.shields.io/badge/JavaScript-F7DF1E.svg?style=for-the-badge&logo=JavaScript&logoColor=black" alt="JavaScript">
  <img src="https://img.shields.io/badge/HTML5-E34F26.svg?style=for-the-badge&logo=HTML5&logoColor=white" alt="HTML5">
  <img src="https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white" alt="CSS">
  <img src="https://img.shields.io/badge/Electron-47848F.svg?style=for-the-badge&logo=Electron&logoColor=white" alt="Electron">
  <img src="https://img.shields.io/badge/Next.js-000000.svg?style=for-the-badge&logo=nextdotjs&logoColor=white" alt="Next.js">
</p>
    



<!-- @format -->

### Project Overview


In [24]:
import os
from pathlib import Path
from typing import List, Dict
from prettytable import PrettyTable
from lightrag.core.generator import Generator
from lightrag.core.component import Component
from lightrag.components.model_client import OllamaClient
from tqdm import tqdm

project_overview_template = r"""<SYS>
You are a summarization assistant specialized in project documentation.
</SYS>
Based on the provided file summaries:
{{input_str}},

Generate a concise and descriptive one-paragraph overview of the project, including:
1. What the project is about (2 sentences).
2. What the project does(more than 3 sentences).
3. The technologies used.
4. The key features of the project.

Dont add predescription and post decription in the ans
Summary:"""


class OverviewQA(Component):
    def __init__(self, model_client: OllamaClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=project_overview_template,
        )

    def call(self, summaries: str) -> str:
        return self.generator.call({"input_str": summaries})

    async def acall(self, summaries: str) -> str:
        return await self.generator.acall({"input_str": summaries})


def generate_project_overview(combined_summary: str, oa_component: OverviewQA) -> str:
    """Generate a concise and descriptive overview of the project based on file summaries."""

    # Generate the project overview using the model
    overview = oa_component.generator.call({"input_str": combined_summary})

    return overview


oa = OverviewQA(**model)

if combined_summary:
    # Generate the project overview
    project_overview = generate_project_overview(summary, oa_component=oa)
    project_overview = get_description_data(project_overview)
    project_overview_markdown = (
        "# Project Overview\n\n" +
        project_overview.strip().replace("\n\n", "\n\n").replace("  ", " ")
    )
    # Print the project overview
    print(project_overview_markdown)
    display(Markdown(project_overview_markdown))

# Project Overview

The project is a Daraz e-commerce platform data scraper that extracts product information from the website. It utilizes web scraping techniques to gather relevant data, including product attributes and seller information. 

This project uses JavaScript and TypeScript technologies, as well as the PUPPETEER library for web scraping and parsing JSON-formatted data. The key features of this project include its ability to extract a wide range of product attributes, such as product rating and review count, brand and title, description and highlights, price and discount information, and availability and stock status. Additionally, it can handle errors during the scraping process by throwing informative error messages.


# Project Overview

The project is a Daraz e-commerce platform data scraper that extracts product information from the website. It utilizes web scraping techniques to gather relevant data, including product attributes and seller information. 

This project uses JavaScript and TypeScript technologies, as well as the PUPPETEER library for web scraping and parsing JSON-formatted data. The key features of this project include its ability to extract a wide range of product attributes, such as product rating and review count, brand and title, description and highlights, price and discount information, and availability and stock status. Additionally, it can handle errors during the scraping process by throwing informative error messages.

<!-- @format -->

### Key Features


In [25]:
import os
from pathlib import Path
from typing import List, Dict
from prettytable import PrettyTable
from lightrag.core.generator import Generator
from lightrag.core.component import Component
from lightrag.components.model_client import OllamaClient
from tqdm import tqdm

key_feature_template = r"""<SYS>
You are an expert computer engineer specializing in project documentation and coding, with advanced knowledge of various programming technologies.
</SYS>
Based on the provided file summaries:
{{input_str}},

Extract and list the key features (minimun 5 features) in a concise format. Each feature should include:
- Feature Name: A brief description of the feature and its significance.

Use the following format for listing the features:
- **Feature Name**: Description of the feature and its significance.

Ensure that the features are listed clearly and concisely, highlighting the most important aspects and functionalities that define the project’s value and just give me bulletin. No explaination need before and after bulletin likes "Here are the key features extracted from the provided code snippets:", "Let me know if you'd like me to help with anything else!"
Summary:
"""


class FeatureQA(Component):
    def __init__(self, model_client: OllamaClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=key_feature_template,
        )

    def call(self, summaries: str) -> str:
        return self.generator.call({"input_str": summaries})

    async def acall(self, summaries: str) -> str:
        return await self.generator.acall({"input_str": summaries})


def generate_key_feature(combined_summary: str, fa_component: FeatureQA) -> str:

    overview = fa_component.generator.call({"input_str": combined_summary})

    return overview


fa = FeatureQA(**model)

if combined_summary:
    # Generate the project overview
    key_feature = generate_key_feature(summary, fa_component=fa)
    key_feature = get_description_data(key_feature)
    key_feature_markdown = (
        "# Key Features\n" + key_feature)
    print(key_feature_markdown)
    display(Markdown(key_feature_markdown))

# Key Features
- **PriceHistoryItem**: An object representing a single price history item.
- **User**: A type representing a user with an email property.
- **Product**: A complex type representing various product data and properties.
- **NotificationType**: An enumeration of possible notification types.
- **EmailContent**: An object containing subject and body fields for email content.
- **Price**: An object representing price information with text and value fields.


# Key Features
- **PriceHistoryItem**: An object representing a single price history item.
- **User**: A type representing a user with an email property.
- **Product**: A complex type representing various product data and properties.
- **NotificationType**: An enumeration of possible notification types.
- **EmailContent**: An object containing subject and body fields for email content.
- **Price**: An object representing price information with text and value fields.

<!-- @format -->

### Getting Started


In [26]:
installation_template_v1 = r"""<SYS>
You are a highly skilled software engineer with expertise in documentation and project setup. You are adept at analyzing project summaries and folder structures to create clear installation instructions.
</SYS>
Based on the following project summary and folder structure:
Summary:
{{project_summary}}

Folder Structure:
{{folder_structure}}

GitHub repo Link:
{{repo_link}}

Create a detailed installation guide that includes:
1. **Prerequisites**: List any software, tools, or environment setups required before installation (e.g., Node.js, Docker) and provide link to download or install them.
2. **Setup Instructions**: Step-by-step instructions to set up the project, including installing dependencies, configuring environment variables, and any other necessary setup.
3. **Running the Project**: Detailed commands and steps to run the project locally, including any necessary build steps or configuration commands.
4. **Troubleshooting**: Common issues that may arise during installation and how to resolve them.
Ensure that the installation guide is comprehensive and easy to follow for someone new to the project and properly format them with heading like # Getting Started, ## Prerequisites, ## Installation, ## Running the Project, ## Tests, ## Troubleshooting
Summary:"""


class InstallationQA(Component):
    def __init__(self, model_client: OllamaClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=installation_template_v1,
        )

    def call(self, project_summary: str, folder_structure: str, repo_link: str) -> str:
        return self.generator.call({
            "project_summary": project_summary,
            "folder_structure": folder_structure,
            "repo_link": repo_link
        })

    async def acall(self, project_summary: str, folder_structure: str, repo_link: str) -> str:
        return await self.generator.acall({
            "project_summary": project_summary,
            "folder_structure": folder_structure,
            "repo_link": repo_link
        })


def generate_installation_guide(project_summary: str, folder_structure: str, repo_link: str, ia_component: InstallationQA) -> str:
    # Use the InstallationQA component to generate the installation guide
    installation_guide = ia_component.call(
        project_summary, folder_structure, repo_link)

    return installation_guide


ia = InstallationQA(**model)

if combined_summary:
    # Generate the installation guide
    installation_guide = generate_installation_guide(
        combined_summary, folder_structure_str, repository_url, ia_component=ia)
    installation_guide = get_description_data(installation_guide)

    installation_guide_markdown = (installation_guide)
    print(installation_guide_markdown)
    display(Markdown(installation_guide_markdown))

**Getting Started**

Welcome to the Daraz Scraper project! This guide will walk you through the steps to install and run the project on your local machine.

**Prerequisites**
-----------------

Before we begin, make sure you have the following software installed:

* Node.js (v14 or higher) - [Download](https://nodejs.org/en/download/)
* Docker (optional) - [Download](https://www.docker.com/get-started)
* A code editor or IDE of your choice

**Installation**
---------------

### 1. Clone the repository

Open a terminal and run the following command to clone the project:

```bash
git clone https://github.com/Eemayas/Daraz_Scraper.git
```

### 2. Install dependencies

Navigate into the project directory and install the required dependencies using npm or yarn:

```bash
cd Daraz_Scraper
npm install
# or
yarn install
```

### 3. Configure environment variables (optional)

If you're planning to use Docker, create a new file called `.env.local` in the root of the project with the following con

**Getting Started**
====================

Welcome to the Daraz Scraper project! This guide will walk you through the steps to install and run the project on your local machine.

**Prerequisites**
-----------------

Before we begin, make sure you have the following software installed:

* Node.js (v14 or higher) - [Download](https://nodejs.org/en/download/)
* Docker (optional) - [Download](https://www.docker.com/get-started)
* A code editor or IDE of your choice

**Installation**
---------------

### 1. Clone the repository

Open a terminal and run the following command to clone the project:

```bash
git clone https://github.com/Eemayas/Daraz_Scraper.git
```

### 2. Install dependencies

Navigate into the project directory and install the required dependencies using npm or yarn:

```bash
cd Daraz_Scraper
npm install
# or
yarn install
```

### 3. Configure environment variables (optional)

If you're planning to use Docker, create a new file called `.env.local` in the root of the project with the following content:

```makefile
DB_URL=mongodb://localhost:27017/
```

Replace `DB_URL` with your MongoDB connection string if it's different.

### 4. Build and start the project

Run the following command to build and start the project:

```bash
npm run dev
# or
yarn dev
```

This will start the development server, and you should see a message indicating that the server is running on port 3000.

**Running the Project**
----------------------

To access the project in your browser, navigate to `http://localhost:3000` .

### Testing

We use Jest for testing. To run tests, execute:

```bash
npm run test
# or
yarn test
```

**Troubleshooting**
-------------------

Common issues and their solutions:

* **Error: Cannot find module 'mongodb'**: Make sure you have MongoDB installed on your local machine.
* **Error: Cannot connect to MongoDB**: Check that the connection string in `.env.local` is correct, and try restarting the project.
* **Error: Docker container failed to start**: Try deleting the `node_modules` directory and running `npm install` again.

**Summary**
----------

That's it! You've successfully installed and run the Daraz Scraper project on your local machine. If you encounter any issues during installation, refer to the troubleshooting section above for solutions. Happy coding!

<!-- @format -->

### API Refrence


In [36]:
import os
from pathlib import Path
from typing import List, Dict
from prettytable import PrettyTable
from lightrag.core.generator import Generator
from lightrag.core.component import Component
from lightrag.components.model_client import OllamaClient
from tqdm import tqdm

api_template_v1 = r"""<SYS>
You are an API reference extraction assistant specialized in coding files.
</SYS>
Please extract the API reference from the following code and provide the following information:
1. API endpoint
2. Purpose of the API
3. Parameters
4. Parameter types
5. Parameter descriptions
6. HTTP method

Format:

#### {Purpose of the API}

```http
  {HTTP method} {API endpoint}
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
{parameter_rows}

Example:

#### Get all items

```http
  GET /api/items
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| `api_key` | `string` | **Required**. Your API key |
| `limit`   | `integer`| **Optional**. Limit the number of items |


If there is no API Reference, please return "No API Reference". No description needed in that case. Avoid asking for response like this "Let me know if you'd like me to clarify anything!" and write the Notes in third person narrative

Code:
{{input_str}}
"""
api_template = r"""
You are an API reference extraction assistant specializing in coding files. Your task is to identify and extract information about HTTP API methods from the provided code. Focus only on endpoints that use HTTP methods (GET, POST, PUT, DELETE, etc.).

For each API reference found, provide the following details:
1. API endpoint
2. Purpose of the API
3. Parameters
4. Parameter types
5. Parameter descriptions
6. HTTP method

If the code does not include any API references or if no HTTP methods are present, return "No API Reference."

Format:

#### {Purpose of the API}

```http
  {HTTP method} {API endpoint}
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
{parameter_rows}

Example:

#### Get all items

```http
  GET /api/items
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| `api_key` | `string` | **Required**. Your API key |
| `limit`   | `integer`| **Optional**. Limit the number of items |

Code:
{{input_str}}

"""


class APIReferenceExtractor(Component):
    def __init__(self, model_client: OllamaClient, model_kwargs: dict):
        super().__init__()
        self.generator = Generator(
            model_client=model_client,
            model_kwargs=model_kwargs,
            template=api_template,
        )

    def call(self, input: str) -> str:
        return self.generator.call({"input_str": input})

    async def acall(self, input: str) -> str:
        return await self.generator.acall({"input_str": input})


def generate_api_reference(path: Path, ignore_list: List[str], api_component: APIReferenceExtractor, ignore_extensions: List[str]) -> List[Dict[str, str]]:
    """Generate an API reference of files in the given path using the model."""
    api_reference = []
    files_to_process = []

    for root, dirs, files in os.walk(path):
        relative_root = os.path.relpath(root, path)

        if any(ignored in relative_root.split(os.sep) for ignored in ignore_list):
            continue

        for file in files:
            file_path = Path(root) / file

            if any(ignored in file_path.parts for ignored in ignore_list):
                continue

            if any(ignore in file_path.name for ignore in specific_ignores_api):
                continue

            if file_path.suffix.lower() in api_ignore_extensions:
                continue

            files_to_process.append(file_path)

    pbar = tqdm(files_to_process, unit="file")
    for file_path in pbar:
        # Update the description dynamically
        pbar.set_description(f"Processing files - {file_path}")
        try:
            with open(file_path, 'r') as f:
                file_content = f.read()

            api_text = api_component.call(file_content)
            api_reference.append(
                {"file": file_path, "api_reference": api_text})
        except Exception as e:
            api_reference.append(
                {"file": file_path, "api_reference": f"Error processing file: {str(e)}"})

    return api_reference


api_a = APIReferenceExtractor(**model)

if repo_name:
    path = Path(repo_name)
    if not path.is_dir():
        print(f"The path {path} is not a directory.")
    api_reference = generate_api_reference(path, ignore_list=ignore_list,
                                           api_component=api_a, ignore_extensions=ignore_extensions)

    # Initialize PrettyTable
    api_table = PrettyTable()
    api_table.field_names = ["File", "api_reference"]

    # Create a list to hold data for the DataFrame
    api_data_for_excel = []

    for item in api_reference:
        description_data = get_description_data(item["api_reference"])
        if "No API Reference" in description_data:
            continue
        api_table.add_row([item["file"], description_data])
        api_data_for_excel.append(
            {"File": item["file"], "api_reference": description_data})

    # Print the PrettyTable
    print(api_table)
else:
    print("Repository cloning failed or was skipped.")

Processing files - Daraz_Scraper\types\index.ts: 100%|██████████| 17/17 [00:19<00:00,  1.15s/file]             

+-------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|                 File                |                                                                               api_reference                                                                                |
+-------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Daraz_Scraper\app\api\cron\route.ts |                                                                           #### Get all products                                                                            |
|                                     |                                                                                                             




In [37]:
for item in api_reference:
    if isinstance(item, dict) and "api_reference" in item:
        description_data = get_description_data(item["api_reference"])
        if "No API Reference" in description_data:
            continue
        api_table.add_row([item["file"], description_data])
        api_data_for_excel.append(
            {"File": item["file"], "api_reference": description_data})
else:
    print(f"Unexpected item format: {item}\n")

Unexpected item format: {'file': WindowsPath('Daraz_Scraper/types/index.ts'), 'api_reference': GeneratorOutput(data='#### No API Reference.', error=None, usage=None, raw_response='#### No API Reference.', metadata=None)}



In [38]:
if api_data_for_excel:
    # Convert the list to a DataFrame
    df_api_data = pd.DataFrame(api_data_for_excel)

    # Define the path and name for the Excel file
    excel_path = 'api_reference.xlsx'

    # Save DataFrame to an Excel file
    df_api_data.to_excel(excel_path, index=False, engine='openpyxl')
    print(f"api_reference saved to {excel_path}")

api_reference saved to api_reference.xlsx


In [39]:
# Convert to string in the desired format
api_reference_markdown = "# API Reference\n"
for i, entry in enumerate(api_data_for_excel, start=1):
    file = entry["File"]
    api_reference = entry["api_reference"]
    # api_reference_markdown += f"{i}. **{file}**\n{api_reference}\n\n"
    api_reference_markdown += f"{api_reference}\n"

print(api_reference_markdown)
display(Markdown(api_reference_markdown))

# API Reference
#### Get all products

```http
  GET /api/products
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| None      |          |                             |

There are no additional parameters or endpoints in this code snippet. The API endpoint `/api/products` uses the `GET` HTTP method to fetch all products from the database.

Note: This code snippet does not contain any other API methods like POST, PUT, DELETE etc.
#### Get all products

```http
  GET /api/products
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| None      |          |                             |

There are no additional parameters or endpoints in this code snippet. The API endpoint `/api/products` uses the `GET` HTTP method to fetch all products from the database.

Note: This code snippet does not contain any other API methods like POST, PUT, DELETE etc.



# API Reference
#### Get all products

```http
  GET /api/products
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| None      |          |                             |

There are no additional parameters or endpoints in this code snippet. The API endpoint `/api/products` uses the `GET` HTTP method to fetch all products from the database.

Note: This code snippet does not contain any other API methods like POST, PUT, DELETE etc.
#### Get all products

```http
  GET /api/products
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| None      |          |                             |

There are no additional parameters or endpoints in this code snippet. The API endpoint `/api/products` uses the `GET` HTTP method to fetch all products from the database.

Note: This code snippet does not contain any other API methods like POST, PUT, DELETE etc.


<!-- @format -->

### Contributing


In [41]:

def generate_contributing_guide(repo_link):
    # Extract the username and repository name from the link
    import re
    match = re.match(r'https://github.com/([^/]+)/([^/]+)', repo_link)
    if not match:
        raise ValueError("Invalid GitHub repository link")

    username, repo_name = match.groups()

    # Define the guide with placeholders for URLs
    guide_template = f"""
# Contributing

Contributions are welcome! Here are several ways you can contribute:

- **[Submit Pull Requests](https://github.com/{username}/{repo_name}/pulls)**: Review open PRs, and submit your own PRs.
- **[Join the Discussions](https://github.com/{username}/{repo_name}/discussions)**: Share your insights, provide feedback, or ask questions.
- **[Report Issues](https://github.com/{username}/{repo_name}/issues)**: Submit bugs found or log feature requests for {repo_name}.

### Contributing Guidelines

1. **Fork the Repository**:
    - Start by forking the project repository to your GitHub account.
2. **Clone the Repository**:
    - Clone your forked repository to your local machine using the command:
    ```sh
    git clone https://github.com/your-username/{repo_name}.git
    ```
    - Replace ``your-username`` with your GitHub username.
4. **Create a New Branch**:
    - Create a new branch for your changes using the command:
    ```sh
    git checkout -b your-branch-name
    ```
5. **Make Your Changes**:
    - Edit, add, or delete files as needed. Ensure your changes align with the project's contribution guidelines.
6. **Commit Your Changes**:
    - Stage your changes and commit them with a descriptive message:
      ```bash
      git add .
      git commit -m "Your descriptive message"
      ```
7. **Push Your Changes:**
    - Push your branch to your forked repository:
      ```bash
      git push origin your-branch-name
      ```
8. **Create a Pull Request (PR):**
    - Go to the original repository on GitHub and click “Compare & pull request.” Provide a clear description of the changes and submit the PR.

Once your PR is reviewed and approved, it will be merged into the main branch.
        """

    return guide_template


contribution_markdown = generate_contributing_guide(repository_url)
print(contribution_markdown)
display(Markdown(contribution_markdown))


# Contributing

Contributions are welcome! Here are several ways you can contribute:

- **[Submit Pull Requests](https://github.com/Eemayas/Daraz_Scraper/pulls)**: Review open PRs, and submit your own PRs.
- **[Join the Discussions](https://github.com/Eemayas/Daraz_Scraper/discussions)**: Share your insights, provide feedback, or ask questions.
- **[Report Issues](https://github.com/Eemayas/Daraz_Scraper/issues)**: Submit bugs found or log feature requests for Daraz_Scraper.

### Contributing Guidelines

1. **Fork the Repository**:
    - Start by forking the project repository to your GitHub account.
2. **Clone the Repository**:
    - Clone your forked repository to your local machine using the command:
    ```sh
    git clone https://github.com/your-username/Daraz_Scraper.git
    ```
    - Replace ``your-username`` with your GitHub username.
4. **Create a New Branch**:
    - Create a new branch for your changes using the command:
    ```sh
    git checkout -b your-branch-name
    ```


# Contributing

Contributions are welcome! Here are several ways you can contribute:

- **[Submit Pull Requests](https://github.com/Eemayas/Daraz_Scraper/pulls)**: Review open PRs, and submit your own PRs.
- **[Join the Discussions](https://github.com/Eemayas/Daraz_Scraper/discussions)**: Share your insights, provide feedback, or ask questions.
- **[Report Issues](https://github.com/Eemayas/Daraz_Scraper/issues)**: Submit bugs found or log feature requests for Daraz_Scraper.

### Contributing Guidelines

1. **Fork the Repository**:
    - Start by forking the project repository to your GitHub account.
2. **Clone the Repository**:
    - Clone your forked repository to your local machine using the command:
    ```sh
    git clone https://github.com/your-username/Daraz_Scraper.git
    ```
    - Replace ``your-username`` with your GitHub username.
4. **Create a New Branch**:
    - Create a new branch for your changes using the command:
    ```sh
    git checkout -b your-branch-name
    ```
5. **Make Your Changes**:
    - Edit, add, or delete files as needed. Ensure your changes align with the project's contribution guidelines.
6. **Commit Your Changes**:
    - Stage your changes and commit them with a descriptive message:
      ```bash
      git add .
      git commit -m "Your descriptive message"
      ```
7. **Push Your Changes:**
    - Push your branch to your forked repository:
      ```bash
      git push origin your-branch-name
      ```
8. **Create a Pull Request (PR):**
    - Go to the original repository on GitHub and click “Compare & pull request.” Provide a clear description of the changes and submit the PR.

Once your PR is reviewed and approved, it will be merged into the main branch.
        

<!-- @format -->

### Contributors


In [42]:
def generate_contributors_table(contributors):
    # Start with the table header
    table = "| Avatar | Contributor | GitHub Profile | No of Contributions |\n"
    table += "|:--------:|:--------------:|:----------------:|:-------------------:|\n"

    # Add each contributor to the table
    for contributor in contributors:
        table += (
            f"| <img src='{contributor.avatar_url}' width='40' height='40' style='border-radius:50%;'/> | "
            f"{contributor.name} | "
            f"[@{contributor.name}]({contributor.profile_url}) | "
            f"{contributor.contributions} |\n"
        )

    return table


# Example usage:
contributor_markdown = f'''
# Contributors\n
{generate_contributors_table(metadata.contributors)}
    '''
print(contributor_markdown)
display(Markdown(contributor_markdown))


# Contributors

| Avatar | Contributor | GitHub Profile | No of Contributions |
|:--------:|:--------------:|:----------------:|:-------------------:|
| <img src='https://avatars.githubusercontent.com/u/100434825?v=4' width='40' height='40' style='border-radius:50%;'/> | Eemayas | [@Eemayas](https://github.com/Eemayas) | 14 |

    



# Contributors

| Avatar | Contributor | GitHub Profile | No of Contributions |
|:--------:|:--------------:|:----------------:|:-------------------:|
| <img src='https://avatars.githubusercontent.com/u/100434825?v=4' width='40' height='40' style='border-radius:50%;'/> | Eemayas | [@Eemayas](https://github.com/Eemayas) | 14 |

    

<!-- @format -->

### License


In [43]:
license_markdown = """
# License

This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.

"""

<!-- @format -->

# Markdown


In [50]:
# markdown_template = r"""<SYS>
# You are an assistant that specializes in converting text into Markdown format.
# </SYS>
# Please convert the following into a Markdown formatted document:
# {{input_str}}
# Dont have predescription like "Here is the converted Markdown formatted document:\n\n" and
# Markdown:"""


# class MarkdownConverter(Component):
#     def __init__(self, model_client: OllamaClient, model_kwargs: dict):
#         super().__init__()
#         self.generator = Generator(
#             model_client=model_client,
#             model_kwargs=model_kwargs,
#             template=markdown_template,
#         )

#     def call(self, input: str) -> str:
#         return self.generator.call({"input_str": input})

#     async def acall(self, input: str) -> str:
#         return await self.generator.acall({"input_str": input})


# markdown_converter = MarkdownConverter(**model)

# # Convert the project overview to Markdown using the LLM
# key_feature_markdown_output = markdown_converter.call(key_feature)

# # Print the Markdown output
# print(get_description_data(key_feature_markdown_output))

In [51]:

combined_markdown = \
    header_markdown+"\n\n---\n" +\
    project_overview_markdown+"\n\n---\n" +\
    key_feature_markdown+"\n\n---\n" +\
    folder_structure_markdown+"\n\n---\n" +\
    installation_guide_markdown+"\n\n---\n" +\
    api_reference_markdown+"\n\n---\n" +\
    contribution_markdown+"\n\n---\n" +\
    contributor_markdown+"\n\n---\n" +\
    license_markdown+"\n\n---\n" +\
    ""


print(combined_markdown)


display(Markdown(combined_markdown))


<p align="center">
    <img src="https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png" width="200" style="border-radius: 20px;" />
</p>
    

<p align="center">
    <h1 align="center">Daraz_Scraper</h1>
</p>
    

<p align="center">
  <img src="https://img.shields.io/github/license/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="license">
  <img src="https://img.shields.io/github/last-commit/Eemayas/Daraz_Scraper?style=flat&logo=git&logoColor=white&color=0080ff" alt="last-commit">
  <img src="https://img.shields.io/github/languages/top/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-top-language">
  <img src="https://img.shields.io/github/languages/count/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-language-count">
  <img src="https://img.shields.io/github/actions/workflow/status/Eemayas/Daraz_Scraper/build.yml?branch=main&style=flat&color=0080ff" alt="build-status">
  <img src="https://img.shields.io/github/issues/Eemayas/Daraz_Scraper?style=f


<p align="center">
    <img src="https://img.icons8.com/nolan/512/1A6DFF/C822FF/shopping-basket-2.png" width="200" style="border-radius: 20px;" />
</p>
    

<p align="center">
    <h1 align="center">Daraz_Scraper</h1>
</p>
    

<p align="center">
  <img src="https://img.shields.io/github/license/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="license">
  <img src="https://img.shields.io/github/last-commit/Eemayas/Daraz_Scraper?style=flat&logo=git&logoColor=white&color=0080ff" alt="last-commit">
  <img src="https://img.shields.io/github/languages/top/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-top-language">
  <img src="https://img.shields.io/github/languages/count/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-language-count">
  <img src="https://img.shields.io/github/actions/workflow/status/Eemayas/Daraz_Scraper/build.yml?branch=main&style=flat&color=0080ff" alt="build-status">
  <img src="https://img.shields.io/github/issues/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="open-issues">
  <img src="https://img.shields.io/github/forks/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="forks">
  <img src="https://img.shields.io/github/stars/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="stars">
  <img src="https://img.shields.io/github/issues-pr/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="pull-requests">
  <img src="https://img.shields.io/github/contributors/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="contributors">
  <img src="https://img.shields.io/github/commit-activity/m/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="commit-activity">
  <img src="https://img.shields.io/github/languages/code-size/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="code-size">
  <img src="https://img.shields.io/github/repo-size/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="repo-size">
  <img src="https://img.shields.io/github/downloads/Eemayas/Daraz_Scraper/total?style=flat&color=0080ff" alt="downloads">
  <img src="https://img.shields.io/github/sponsors/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="sponsors">
  <img src="https://img.shields.io/github/v/release/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="release-version">
  <img src="https://img.shields.io/codecov/c/github/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="coverage">
  <img src="https://img.shields.io/codeclimate/quality/a/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="code-quality">
  <img src="https://img.shields.io/david/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="dependencies">
  <img src="https://img.shields.io/david/dev/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="dev-dependencies">
  <img src="https://img.shields.io/snyk/vulnerabilities/github/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="security">
  <img src="https://img.shields.io/website?style=flat&color=0080ff&url=https%3A%2F%2Fexample.com" alt="performance">
  <img src="https://img.shields.io/github/commit-activity/y/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="activity">
  <img src="https://img.shields.io/docsify/docs?style=flat&color=0080ff" alt="documentation">
  <img src="https://img.shields.io/github/v/tag/Eemayas/Daraz_Scraper?style=flat&color=0080ff" alt="version">
</p>
    

<p align="center">
	<em>Constructed using the following tools and technologies:</em>
</p>

<p align="center">
  <img src="https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white" alt="CSS3">
  <img src="https://img.shields.io/badge/TypeScript-3178C6.svg?style=for-the-badge&logo=TypeScript&logoColor=white" alt="TypeScript">
  <img src="https://img.shields.io/badge/JavaScript-F7DF1E.svg?style=for-the-badge&logo=JavaScript&logoColor=black" alt="JavaScript">
  <img src="https://img.shields.io/badge/HTML5-E34F26.svg?style=for-the-badge&logo=HTML5&logoColor=white" alt="HTML5">
  <img src="https://img.shields.io/badge/CSS3-1572B6.svg?style=for-the-badge&logo=CSS3&logoColor=white" alt="CSS">
  <img src="https://img.shields.io/badge/Electron-47848F.svg?style=for-the-badge&logo=Electron&logoColor=white" alt="Electron">
  <img src="https://img.shields.io/badge/Next.js-000000.svg?style=for-the-badge&logo=nextdotjs&logoColor=white" alt="Next.js">
</p>
    



---
# Project Overview

The project is a Daraz e-commerce platform data scraper that extracts product information from the website. It utilizes web scraping techniques to gather relevant data, including product attributes and seller information. 

This project uses JavaScript and TypeScript technologies, as well as the PUPPETEER library for web scraping and parsing JSON-formatted data. The key features of this project include its ability to extract a wide range of product attributes, such as product rating and review count, brand and title, description and highlights, price and discount information, and availability and stock status. Additionally, it can handle errors during the scraping process by throwing informative error messages.

---
# Key Features
- **PriceHistoryItem**: An object representing a single price history item.
- **User**: A type representing a user with an email property.
- **Product**: A complex type representing various product data and properties.
- **NotificationType**: An enumeration of possible notification types.
- **EmailContent**: An object containing subject and body fields for email content.
- **Price**: An object representing price information with text and value fields.

---
# Folder Structure
```sh
Daraz_Scraper/
├── .env.local.copy
├── .gitignore
├── app/
│   ├── api/
│   │   └── cron/
│   │       └── route.ts
│   ├── favicon.ico
│   ├── globals.css
│   ├── layout.tsx
│   ├── page.tsx
│   └── products/
│       └── [id]/
│           └── page.tsx
├── components/
│   ├── HeroCarousel.tsx
│   ├── Modal.tsx
│   ├── Navbar.tsx
│   ├── PriceInfoCard.tsx
│   ├── ProductCard.tsx
│   └── Searchbar.tsx
├── lib/
│   ├── action/
│   │   └── index.ts
│   ├── models/
│   │   └── product.model.ts
│   ├── mongoose.ts
│   ├── nodemailer/
│   │   └── index.ts
│   ├── scrapper/
│   │   └── index.ts
│   └── utils.ts
├── next.config.js
├── package-lock.json
├── package.json
├── postcss.config.js
├── public/
│   ├── assets/
│   │   ├── icons/
│   │   │   ├── arrow-down.svg
│   │   │   ├── arrow-right.svg
│   │   │   ├── arrow-up.svg
│   │   │   ├── bag.svg
│   │   │   ├── black-heart.svg
│   │   │   ├── bookmark.svg
│   │   │   ├── chart.svg
│   │   │   ├── check.svg
│   │   │   ├── chevron-down.svg
│   │   │   ├── comment.svg
│   │   │   ├── frame.svg
│   │   │   ├── hand-drawn-arrow.svg
│   │   │   ├── logo.svg
│   │   │   ├── mail.svg
│   │   │   ├── price-tag.svg
│   │   │   ├── red-heart.svg
│   │   │   ├── search.svg
│   │   │   ├── share.svg
│   │   │   ├── square.svg
│   │   │   ├── star.svg
│   │   │   ├── user.svg
│   │   │   └── x-close.svg
│   │   └── images/
│   │       ├── details.svg
│   │       ├── hero-1.svg
│   │       ├── hero-2.svg
│   │       ├── hero-3.svg
│   │       ├── hero-4.svg
│   │       ├── hero-5.svg
│   │       └── trending.svg
│   ├── demo/
│   │   ├── home-page.png
│   │   ├── product-page.png
│   │   └── track-product.png
│   ├── next.svg
│   └── vercel.svg
├── README.md
├── tailwind.config.ts
├── tsconfig.json
├── types/
│   └── index.ts
└── vercel.json

17 directories, 63 files
```

---
**Getting Started**
====================

Welcome to the Daraz Scraper project! This guide will walk you through the steps to install and run the project on your local machine.

**Prerequisites**
-----------------

Before we begin, make sure you have the following software installed:

* Node.js (v14 or higher) - [Download](https://nodejs.org/en/download/)
* Docker (optional) - [Download](https://www.docker.com/get-started)
* A code editor or IDE of your choice

**Installation**
---------------

### 1. Clone the repository

Open a terminal and run the following command to clone the project:

```bash
git clone https://github.com/Eemayas/Daraz_Scraper.git
```

### 2. Install dependencies

Navigate into the project directory and install the required dependencies using npm or yarn:

```bash
cd Daraz_Scraper
npm install
# or
yarn install
```

### 3. Configure environment variables (optional)

If you're planning to use Docker, create a new file called `.env.local` in the root of the project with the following content:

```makefile
DB_URL=mongodb://localhost:27017/
```

Replace `DB_URL` with your MongoDB connection string if it's different.

### 4. Build and start the project

Run the following command to build and start the project:

```bash
npm run dev
# or
yarn dev
```

This will start the development server, and you should see a message indicating that the server is running on port 3000.

**Running the Project**
----------------------

To access the project in your browser, navigate to `http://localhost:3000` .

### Testing

We use Jest for testing. To run tests, execute:

```bash
npm run test
# or
yarn test
```

**Troubleshooting**
-------------------

Common issues and their solutions:

* **Error: Cannot find module 'mongodb'**: Make sure you have MongoDB installed on your local machine.
* **Error: Cannot connect to MongoDB**: Check that the connection string in `.env.local` is correct, and try restarting the project.
* **Error: Docker container failed to start**: Try deleting the `node_modules` directory and running `npm install` again.

**Summary**
----------

That's it! You've successfully installed and run the Daraz Scraper project on your local machine. If you encounter any issues during installation, refer to the troubleshooting section above for solutions. Happy coding!

---
# API Reference
#### Get all products

```http
  GET /api/products
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| None      |          |                             |

There are no additional parameters or endpoints in this code snippet. The API endpoint `/api/products` uses the `GET` HTTP method to fetch all products from the database.

Note: This code snippet does not contain any other API methods like POST, PUT, DELETE etc.
#### Get all products

```http
  GET /api/products
```

| Parameter | Type     | Description                |
| :-------- | :------- | :------------------------- |
| None      |          |                             |

There are no additional parameters or endpoints in this code snippet. The API endpoint `/api/products` uses the `GET` HTTP method to fetch all products from the database.

Note: This code snippet does not contain any other API methods like POST, PUT, DELETE etc.


---

# Contributing

Contributions are welcome! Here are several ways you can contribute:

- **[Submit Pull Requests](https://github.com/Eemayas/Daraz_Scraper/pulls)**: Review open PRs, and submit your own PRs.
- **[Join the Discussions](https://github.com/Eemayas/Daraz_Scraper/discussions)**: Share your insights, provide feedback, or ask questions.
- **[Report Issues](https://github.com/Eemayas/Daraz_Scraper/issues)**: Submit bugs found or log feature requests for Daraz_Scraper.

### Contributing Guidelines

1. **Fork the Repository**:
    - Start by forking the project repository to your GitHub account.
2. **Clone the Repository**:
    - Clone your forked repository to your local machine using the command:
    ```sh
    git clone https://github.com/your-username/Daraz_Scraper.git
    ```
    - Replace ``your-username`` with your GitHub username.
4. **Create a New Branch**:
    - Create a new branch for your changes using the command:
    ```sh
    git checkout -b your-branch-name
    ```
5. **Make Your Changes**:
    - Edit, add, or delete files as needed. Ensure your changes align with the project's contribution guidelines.
6. **Commit Your Changes**:
    - Stage your changes and commit them with a descriptive message:
      ```bash
      git add .
      git commit -m "Your descriptive message"
      ```
7. **Push Your Changes:**
    - Push your branch to your forked repository:
      ```bash
      git push origin your-branch-name
      ```
8. **Create a Pull Request (PR):**
    - Go to the original repository on GitHub and click “Compare & pull request.” Provide a clear description of the changes and submit the PR.

Once your PR is reviewed and approved, it will be merged into the main branch.
        

---

# Contributors

| Avatar | Contributor | GitHub Profile | No of Contributions |
|:--------:|:--------------:|:----------------:|:-------------------:|
| <img src='https://avatars.githubusercontent.com/u/100434825?v=4' width='40' height='40' style='border-radius:50%;'/> | Eemayas | [@Eemayas](https://github.com/Eemayas) | 14 |

    

---

# License

This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.



---


In [52]:
# Specify the file name
file_name = f"{metadata.name}_README.md"

# Open the file in write mode with utf-8 encoding and save the content
with open(file_name, 'w', encoding='utf-8') as file:
    file.write(str(combined_markdown))

print(f"{file_name} has been created and saved.")

Daraz_Scraper_README.md has been created and saved.
