In [1]:
from pathlib import Path
from unittest import result
from git import Repo

ModuleNotFoundError: No module named 'git'

In [4]:
class MLProjectInitializer:
    def __init__(self, project_name: str, base_dir: str = ".", github_url: str = None):
        self.project_name = project_name
        self.base_dir = Path(base_dir).expanduser().resolve()
        self.project_path = self.base_dir / project_name
        self.github_url = github_url  # Existing repo URL

    # ---------------------------
    # 1. Create folder structure
    # ---------------------------
    def create_structure(self):
        folders = [
            "data/raw",
            "data/processed",
            "data/external",
            "notebooks",
            "src/data",
            "src/features",
            "src/models",
            "src/visualization",
            "tests",
            "models",
            "reports/figures",
        ]

        for folder in folders:
            (self.project_path / folder).mkdir(parents=True, exist_ok=True)

        # Core files
        files = {
            "README.md": f"# {self.project_name}\n\nMachine Learning project.",
            ".gitignore": self._gitignore_content(),
            "requirements.txt": "",
            "setup.py": self._setup_py_content(),
            "src/__init__.py": "",
        }

        for file, content in files.items():
            file_path = self.project_path / file
            if not file_path.exists():
                file_path.write_text(content)

    # ---------------------------
    # 2. Initialize git repo locally
    # ---------------------------
    def init_git(self):
        self.repo = Repo.init(self.project_path)
        self.repo.git.add(A=True)
        self.repo.index.commit("Initial commit")
        # Make sure branch is 'main'
        self.repo.git.branch("-M", "main")

    # ---------------------------
    # 3. Add remote (existing GitHub repo)
    # ---------------------------
    def add_gitkeep_for_empty_dirs(self):
        for folder in self.project_path.rglob("*"):
            if folder.is_dir() and not any(folder.iterdir()):
                (folder / ".gitkeep").touch()


    def add_remote(self):
        if not self.github_url:
            raise ValueError("Existing GitHub URL is required")
        if "origin" in [r.name for r in self.repo.remotes]:
            self.remote = self.repo.remotes.origin
        else:
            self.remote = self.repo.create_remote("origin", self.github_url)

    # ---------------------------
    # 4. Push to GitHub
    # ---------------------------
    def push_to_github(self):
        self.add_remote()
        result = self.remote.push(refspec="main:main", set_upstream=True)
        for info in result:
            print(f"Push info: {info.summary}, flags: {info.flags}")

    # ---------------------------
    # Helpers
    # ---------------------------
    @staticmethod
    def _gitignore_content():
        return """
            __pycache__/
            *.pyc
            .env
            .vscode/
            .idea/
            data/raw/
            models/
            """

    @staticmethod
    def _setup_py_content():
        return """from setuptools import setup, find_packages

setup(
    name="ml_project",
    packages=find_packages(),
)
"""


In [5]:
initializer = MLProjectInitializer(
    project_name="marketing-mix-modelling-ml",
    base_dir="./",
    github_url="https://github.com/Badisj/generative-ai-orchestration-cpg"
)

initializer.create_structure()