ENV

In [5]:
import os
from dotenv import load_dotenv 
load_dotenv() 

SOURCE_DIR = os.getenv("SOURCE_DIR")
GITLAB_PATH = os.getenv("GITLAB_PATH")
GROUP_NAME = os.getenv("GROUP_NAME")
USER_NAME = os.getenv("USER_NAME")
USER_MAIL = os.getenv("USER_MAIL")
PRIVATE_TOKEN_GITLAB = os.getenv("PRIVATE_TOKEN_GITLAB")
PRIVATE_TOKEN_GITHUB = os.getenv("PRIVATE_TOKEN_GITHUB")
LLM_PATH = os.getenv("LLM_PATH")
LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME")

Create Folder for ipynb files

In [None]:
import os
import shutil

# Loop through all files in the source directory
for filename in os.listdir(SOURCE_DIR):
    if filename.endswith('.ipynb'):
        # Create a folder for this file (use the same name as the file without extension)
        dest_dir = os.path.join(SOURCE_DIR, os.path.splitext(filename)[0])
        os.makedirs(dest_dir, exist_ok=True)  # 'exist_ok' will not raise error if the directory already exists
        # Move the file to this new folder
        shutil.move(os.path.join(SOURCE_DIR, filename), dest_dir)

LLM README

In [None]:
import nbformat
# Example: reuse your existing OpenAI setup
from openai import OpenAI
import openai
import httpx
from glob import glob
from tqdm import tqdm
import os
from nbconvert import PythonExporter
# Point to the local server
client = OpenAI(base_url=LLM_PATH, api_key="lm-studio")


def convertNotebook(notebookPath):
  with open(notebookPath) as fh:
    nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)

  exporter = PythonExporter()
  source, meta = exporter.from_notebook_node(nb)
  return source


for folder in tqdm(glob(f'{SOURCE_DIR}/*')):
    print(folder)
    # Example usage

    if os.path.exists(f'{folder}/README.MD'):
      continue

    notebook_str = convertNotebook(f'{folder}/{os.path.basename(folder)}.ipynb')
    completion = client.chat.completions.create(
    model=LLM_MODEL_NAME,
    messages=[
        {"role": "system", "content": "This is code from kaggle competition, Write a detailed README file, as if it were code on Github, add to readme: Name of the competition, type of problem to be solved, link to the competition on Kaggle, description of the competition problem, libraries used, in the style of a scientific paper"},
        {"role": "user", "content": f"{notebook_str}"}
    ],
    temperature=0.3,
    timeout = httpx.Timeout(3600.0, read=3600.0, write=3600.0, connect=3600.0)
    )

    with open(f'{folder}/README.MD', 'w') as f:
        f.write(completion.choices[0].message.content)


This script will go through all subdirectories from the current directory, check if they are Git repositories (by looking for a .git folder), and if not, it will initialize them.

In [None]:
import os
import subprocess

def add_git_config(folder):
    # Navigate to the folder
    os.chdir(folder)

    if not os.path.exists(os.path.join(os.path.basename(folder), '.git')):
        print('Initializing repository at {}'.format(os.path.basename(folder)))
        subprocess.run(['git', 'init'], check=True)
    # Add Git remote for Github
    github_url = f"git@github.com:{USER_NAME}/{os.path.basename(folder)}.git"
    subprocess.run(["git", "remote", "add", "github", github_url])
    subprocess.run(["git", "config", "--local", "remote.github.fetch", "+refs/heads/*:refs/remotes/github/*"])

    # Add Git remote for Gitlab
    gitlab_url = f"{GITLAB_PATH}/{GROUP_NAME}/{os.path.basename(folder)}.git"
    subprocess.run(["git", "remote", "add", "gitlab", gitlab_url])
    subprocess.run(["git", "config", "--local", "remote.gitlab.fetch", "+refs/heads/*:refs/remotes/gitlab/*"])

    # Add user info to Git config
    subprocess.run(["git", "config", "--local", "user.name", USER_NAME])
    subprocess.run(["git", "config", "--local", "user.email", USER_MAIL])

    # Set master and main branches to merge from Github and Gitlab respectively
    subprocess.run(["git", "branch", "-m", "master", "main"])  # Rename master branch to main if necessary
    subprocess.run(["git", "config", "--local", "branch.master.remote", "gitlab"])
    subprocess.run(["git", "config", "--local", "branch.master.merge", "refs/heads/master"])
    subprocess.run(["git", "config", "--local", "branch.main.remote", "github"])
    subprocess.run(["git", "config", "--local", "branch.main.merge", "refs/heads/main"])

    os.chdir('../../')

from glob import glob
for dirpath in glob(f'{SOURCE_DIR}/*'):
    add_git_config(dirpath)

Create repository

In [13]:
import requests
from glob import glob
import os

def initialize_repository(url_base, folder, url, headers):
        if url_base=='gitlab':
            data = {
                'name': os.path.basename(folder),
                'visibility': 'public',   # or public, internal
                "namespace_id":"2"
            }
        elif url_base=='github':
             data = {
                'name': os.path.basename(folder),
                'private': True,  # Set to True for private repositories
                'description': f'Description of {os.path.basename(folder)}: Kaggle competiton',
                "is_template":'true',
                "homepage":"https://github.com"
                }                    
        response = requests.post(url, headers=headers, json=data)
        if response.status_code == 201:
            print(f"Successfully created repository '{os.path.basename(folder)}'")
        else:
            print(f"Failed to create repository '{os.path.basename(folder)}'. Status code: {response.status_code}")

#GitLab
# url = f"{GITLAB_PATH}/api/v4/projects"
# headers = {'Private-Token': PRIVATE_TOKEN_GITLAB}

#Github
url = "https://api.github.com/user/repos"
headers = {'Authorization':f'Bearer {PRIVATE_TOKEN_GITHUB}',
           'Accept': 'application/vnd.github+json',
           'X-GitHub-Api-Version': '2022-11-28'
           }

for folder in glob(f'{SOURCE_DIR}/*'):
    initialize_repository("github", folder, url, headers)
    break

# # Change visibility
# for i, folder in enumerate(glob('results/*')):
#     url = f"{GITLAB_PATH}/api/v4/projects/{i+10}"
#     data = {
#         'visibility': 'public',   # or public, internal
#     }
#     response = requests.put(url, headers=headers, data=data)

Failed to create repository '2-5d-cutting-model-baseline-inference'. Status code: 401


Commit

In [None]:
import os
import subprocess

def add_git_config(folder, rep, branch):
    # Navigate to the folder
    os.chdir(folder)
    print(os.getcwd())
    print('Initializing repository at {}'.format(os.path.basename(folder)))
    # subprocess.run(['git', 'init'], check=True)
    # Add Git remote for Github

    subprocess.run(['git', 'add', '.'])
    subprocess.run(['git', 'commit', '-m', '"base commit"'])
    subprocess.run(['git', 'push', rep, branch])

    os.chdir('../../')

from glob import glob
for dirpath in glob(f'{SOURCE_DIR}/*'):
    add_git_config(dirpath, 'github', 'master')