## Python snippets

Fetched from various web places or self build(former mobile screenshots).

In [1]:
# Kernel / Restart kernel ... / run this cell before any other cell, it's required by them
import importlib.metadata as meta
import pandas as pd

# Note: There is no fixed mapping between package name and import name. Somehow you have to know it or someone told you.

class StopExecution(Exception):
    def _render_traceback_(self):
        return None # Suppress traceback display

def countdown(n):
    while n > 0:
        yield n
        n -= 1

def wrapped_enumerate(iterable, max_value):
    for i, item in enumerate(iterable):
        yield (i % max_value, item)        

def waste_multiple_of_5s_with_feedback(waste_amount=6):
    print("waiting ", end="")
    for _ in countdown(waste_amount):
        print(_)
        time.sleep(5)
        print(".", end="")
    print("")

def display_table(table):
    display((
        pd
        .DataFrame(table, columns=['Name', 'Version'])
        .style
        .set_properties(**{'text-align': 'left'})
        .set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}])
        .hide(axis='index')
    ))

def find_package(search_words):
    fault = False
    installed_packages = sorted(
        [(dist.metadata['Name'], dist.version) for dist in meta.distributions()], 
        key=lambda x: x[0].lower())
    # display_table(installed_packages)
    results = []
    for word in search_words:
        match = next(
            ((pkg[0], pkg) for pkg in installed_packages if word.lower() in pkg[0].lower()),
            None
        )
        if match:
            results.append((word, match[1][1]))
        else:
            fault = True
            print("A needed package is missing. Aborting cell execution")
            results.append((word, "n/a"))
    if fault:
        display_table(results)
        raise StopExecution()
    return results
    
# display_table(find_package(["requests", "asyncio", "transformers", "numpy", "beautifulsoup4", "ddgs", "sentence-transformers", "scikit-learn", "pandas"]))

In [None]:
### Fetch only the code cells from *.ipynb and print them

#!/usr/bin/env python3
import nbformat
import os

notebook_files = [f for f in os.listdir('.') if f.endswith('.ipynb')]
for nb_file in notebook_files:
    with open(nb_file, 'r', encoding='utf-8') as f:
        notebook = nbformat.read(f, as_version=4)
    code_cells = [cell['source'] for cell in notebook['cells'] if cell['cell_type'] == 'code']
    for cell_code in code_cells:
        print(cell_code)
    print(f"\n===== End of {nb_file} =====\n")

In [None]:
### Open webbrowser with file automatically
import os
import webbrowser

try:
    # chrome_path = '/usr/bin/google-chrome %s'
    # webbrowser.register('chrome', None, webbrowser.BackgroundBrowser(chrome_path))
    chrome = webbrowser.get('chrome')
    success = chrome.open(f"file:///{os.path.abspath(output_file)}")
    if not success:
        print("Failed to open the browser.")
except Exception as e:
    print(f"Error opening browser: {e}")

In [None]:
find_package(["ddgs"])
# kernel reset might be neccessary, because of caches, ...
import logging
import http.client as http_client
http_client.HTTPConnection.debuglevel = 1
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
# usage example:
from ddgs import DDGS
ddgs = DDGS()
ddgs.text("Gimme me weather forecast pages", max_results=5)
# ... DDGS aggregates search results from multiple engines, not just DuckDuckGo 

In [None]:
# isolated notebook related environment using uv, but be reminded that now each notebook needs it's own folder
# mkdir folder; cd folder
!uv init
!uv add --dev ipykernel
!uv run ipython kernel install --user --env VIRTUAL_ENV=$(uv env) --name=notebook_task_1 --display-name="notebook (task 1)"
!uv add pandas ddgs matplotlib
# Go to Kernel > Change Kernel in JupyterLab UI.
# Select the newly created kernel named "notebook (task 1)"
# 
# working inside this notebooks with this kernel has an isolated environment using only the above added packages
# 

In [None]:
# pretty print the firefox bookmarks json export, use vi and daB do delete from { to } including the two braces.
import json
filename = "data/firefox_bookmarks.json"
try:
    with open(filename, "r", encoding="utf-8") as f:
        bookmarks = json.load(f)
except json.JSONDecodeError as e:
    print("JSON syntax error ", e)
pretty_bookmarks = json.dumps(bookmarks, indent=4, ensure_ascii=False)
print(pretty_bookmarks)

In [None]:
# this old snippet uses the GPU to run the given ollama model, try/catch still missing
from ollama import chat

OLLAMA_MODEL = "qwen2.5-coder:32b-instruct"

def create_messages():
    return [
        {
            "role": "user",
            "content": (
                "gimme a python code example using DDGS() call to execute a query and return 10 urls "
                "checking them with cosine_similarity to proove they are related to the given query "
                "and present a summarize of each."
            )
        }
    ]

def ask_ai(messages, max_retries=3):
    response = chat(model=OLLAMA_MODEL, messages=messages, options={"num_gpu": 132}) # force GPU usage
    return response.message.content

print(ask_ai(create_messages()))

In [None]:
from ollama import chat

def create_messages(user_task, input_data):
    prompt = f"""
    You are an automation assistant trained in Python development and scripting.
    Your job is to take the following user input and perform a highly specific automated transformation.
    ### Here's the task:
    {user_task}
    ### Here's the input data (if any):
    {input_data}
    ### Your job is to:
    - Write complete Python code for automating the task.
    - Include all necessary libraries and handle exceptions.
    - Optimize for readability and reuse.
    - Return only runnable Python code (no explanation).
    """
    return [
        {"role": "system", "content": "You are an expert Python automation engineer."},
        {"role": "user",   "content": prompt }
    ]

def doit():
    OLLAMA_MODEL = "qwen2.5-coder:32b-instruct"
    user_task  = "Fix malformed CSV where some rows have more or fewer columns than the header. Align data properly. Drop incomplete rows."
    input_data = "data/broken.csv"
    response = chat(model=OLLAMA_MODEL, messages=create_messages(user_task, input_data), options={"num_gpu": 132, "num_ctx": 32768}) # force GPU usage
    print(response.message.content)

doit()

In [None]:
# ... the same but with gradio GUI
find_package(["gradio","IProgress"])
import time
import gradio as gr
from IPython.display import display, HTML
from ollama import chat

def create_messages(user_task, input_data):
    prompt = f"""
    You are an automation assistant trained in Python development and scripting.
    Your job is to take the following user input and perform a highly specific automated transformation.
    ### Here's the task:
    {user_task}
    ### Here's the input data (if any):
    {input_data}
    ### Your job is to:
    - Write complete Python code for automating the task.
    - Include all necessary libraries and handle exceptions.
    - Optimize for readability and reuse.
    - Return only runnable Python code (no explanation).
    """
    return [
        {"role": "system", "content": "You are an expert Python automation engineer."},
        {"role": "user",   "content": prompt }
    ]

def generate_code(task, data):
    response = chat(model=OLLAMA_MODEL, messages=create_messages(user_task, input_data), options={"num_gpu": 132, "num_ctx": 32768}) # force GPU usage
    return response.message.content

# TODO: change it to use block / finish_btn like below ...
def interface():
    OLLAMA_MODEL = "qwen2.5-coder:32b-instruct"
    user_task  = "Fix malformed CSV where some rows have more or fewer columns than the header. Align data properly. Drop incomplete rows."
    input_data = "data/broken.csv"
    user_task_input  = gr.Textbox(label="Task Description",   lines=10, placeholder=user_task)
    input_data_input = gr.Textbox(label="Input Data or Path", lines=3,  placeholder=input_data)
    iface = gr.Interface(
        fn=generate_code,
        inputs=[user_task_input, input_data_input],
        outputs=gr.Textbox(label="Generated Python Code", lines=25),
        title="Auto Developer (120 second timeout)",
        description="Describe the task and paste input - get working Python code.",
        css="footer {display:none !important;}"
    )
    iface.launch(server_name="0.0.0.0", server_port=10000) # run it locally is preferred
    display(HTML(f'<a href="http://localhost:17779" target="_blank" style="font-size:20px;"> --- Click me followed by a click at Submit ---</a>'))
    waste_multiple_of_5s_with_feedback(24)
    iface.close() # keeps the port reusable

interface()

In [None]:
# this is one response from the above requests... it just works... this time...
import pandas as pd
def fix_malformed_csv(file_path):
    try:
        # Read the CSV file, assuming no header is provided to avoid misalignment
        df = pd.read_csv(file_path, header=None)
        # Determine the expected number of columns based on the longest row
        expected_columns = df.shape[1]
        # Drop rows that do not match the expected number of columns
        cleaned_df = df[df.isnull().sum(axis=1) == 0]
        # Assuming the first row contains the header, separate it and set as the header
        if not cleaned_df.empty:
            headers = cleaned_df.iloc[0].tolist()
            cleaned_df = cleaned_df[1:].reset_index(drop=True)
            cleaned_df.columns = headers
        # Save the cleaned DataFrame back to CSV
        output_file_path = 'data/cleaned.csv'
        cleaned_df.to_csv(output_file_path, index=False)
        print(f"Cleaned data saved to {output_file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")
# Example usage
fix_malformed_csv('data/broken.csv')

In [None]:
# transformer visual displayed
find_package(["gradio","transformers", "sentencepiece", "protobuf"]) # gradio 5.43.1
import os
os.environ["TRANSFORMERS_NO_TQDM"] = "true" # no tqdm console output of transformer downloads
import random
import time
import threading
import gradio as gr
from IPython.display import HTML
from transformers import AutoTokenizer

def colorize(text, model_name):
    colors = [ # 32 preselected visible colors
        "#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#A133FF", "#33FFF6",
        "#FFC733", "#FF8633", "#33FF8A", "#338AFF", "#FF3380", "#8033FF",
        "#33FFD1", "#FFE933", "#FF6E33", "#33FFB2", "#336FFF", "#FF3377",
        "#9B33FF", "#33FFE3", "#FF9A33", "#33FFA1", "#335CFF", "#FF33D1",
        "#A633FF", "#33FFD7", "#FFE533", "#FF5733", "#33FF74", "#3380FF",
        "#FF33B2", "#8033FF" ]
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    encoding = tokenizer.encode_plus(text, return_offsets_mapping=True, add_special_tokens=False)
    offsets = encoding['offset_mapping']
    ids = encoding['input_ids']
    tokens = tokenizer.convert_ids_to_tokens(ids)
    colors_available = len(colors)
    missing_colors = len(tokens)-colors_available
    if missing_colors>0:
        print(f"Need {missing_colors} more colors. Now reusing existing colors.")
    all_tokens = ""
    all_ids    = ""
    for index, (id_val, token, (start, end)) in wrapped_enumerate(zip(ids, tokens, offsets), colors_available):
        all_tokens += f"<td>&nbsp;<span style='color: {colors[index]};'>{text[start:end]}</span></td>"
        all_ids    += f"<td style='text-align: center;'>{id_val:d}&nbsp</td>"
    # one table for each transformer to achive improved readability (only a tiny amount of space after each token)
    result = (f"<table class='no-space'><tr style='font-size: 20px;'>{all_tokens}<td style='text-align: center; color: red;'>{model_name}</td></tr>"
              f"<tr>{all_ids}<td style='text-align: center; color: green;'>token count {len(tokens)}</td></tr></table>")
    return result
    
def colorize_text_with_transformer(text, model_name, all_transformers, progress=gr.Progress()):
    if not model_name:
        model_name = all_transformers[0]
    if model_name != all_transformers[-1]:
        result = colorize(text, model_name)
    else:
        progress(0, desc="Starting...")
        total = len(all_transformers[:-2])
        result = ""
        for i, model_name in enumerate(all_transformers[:-2]): # the M$ transformer prefers to be single
            progress(i/total, desc=f"Processing {model_name}")
            result += colorize(text, model_name)
        progress(1.0, desc="Done!")
    return result, gr.update(interactive=True) # enable finish button

def interface():
    transformer_models = [ "bert-base-uncased", "bert-base-cased", "roberta-base", "distilbert-base-uncased", "xlm-roberta-base",
                           "albert-base-v2", "gpt2", "facebook/bart-base", "t5-small", "openai-gpt",
                           "google/electra-base-discriminator", "allenai/longformer-base-4096",
                           "google/pegasus-large", "microsoft/deberta-v3-base", "Everything but the last" ]
    css = """
    footer { display: none !important; } /* hide gradio marketing footer */
    #customtextbox textarea { font-size: 24px !important; } /* font enlargement */
    #customdropdown input { font-size: 20px; } /* font enlargement dropdown selected text */
    ul { font-size: 20px; } /* font enlargement dropdown listbox texts */
    table.no-space { border-collapse: collapse; border: 1px solid black; } /* dense table */
    table.no-space th, table.no-space td { border: 1px solid black; padding: 0; } /* tiny cell padding */
    .progress-text { display: none !important; } /* hide seconds counter */
    .progress-level-inner { font-size: 20px !important; } /* enlarge progress description */
    """
    text = "Many words maps to one token, but not always: unaffordable. Characters might be grouped: 1234567890."
    SERVER_IP="0.0.0.0"
    SERVER_PORT=10000
    demo = gr.Blocks(css=css, title="Colorful Token Visualization of Text with Transformer Selection")
    def close_app():
        print("Finish was clicked. Closing the App...")
        demo.close() # does not always get executed
        return
    with demo:
        with gr.Row():
            input_text = gr.Textbox(label="Input Text (english letters, digits, and punctuation)", value=text, elem_id="customtextbox")
            model_dropdown = gr.Dropdown(label="Choose Transformer", value=transformer_models[0], choices=transformer_models, elem_id="customdropdown")
        submit_btn = gr.Button("Submit", interactive=True)
        finish_btn = gr.Button("Finish", interactive=False) # Submit button should be clicked at least once ...
        output_html = gr.HTML(label="Output Text", value="<b>Click at Submit...</b>")
        submit_btn.click(lambda text, model_name: colorize_text_with_transformer(text, model_name, transformer_models), 
                         inputs=[input_text, model_dropdown], 
                         outputs=[output_html, finish_btn])
        finish_btn.click(fn=close_app, inputs=[])
        demo.launch(server_name=SERVER_IP, server_port=SERVER_PORT, prevent_thread_lock=True)

display(HTML((f'<a href="http://localhost:17779" target="_blank" style="font-size:20px;"> --- Click me followed by a click at Submit ---</a><br>'
              f'<span style="font-size:20px;">When you are done, click at the button Finish.</span><br><hr>')))
interface()
# execution continues, but sometimes the app doesn't close properly, so a kernel restart is required to run it again.

In [None]:
find_package(["ddgs", "python-whois"])
import requests
import whois
from http import HTTPStatus

def is_domain_active(url):
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == HTTPStatus.OK:
            return True
        if response.status_code == HTTPStatus.GONE:
            return False
        print(f"Http status code of {url} is {response.status_code}")            
    except requests.RequestException:
        return False
    return False

def is_domain_registered(domain_name):
    try:
        domain_info = whois.whois(domain_name)
        # If domain_info has expiration_date or registrar info, domain likely registered
        if domain_info.domain_name:
            # print(domain_info)
            expiration = domain_info.expiration_date
            if isinstance(expiration, list):
                expiration = expiration[0]
            print(f"Expiration date of {domain_name} is {expiration}")            
            return True
    except Exception as e:
        # Could not retrieve WHOIS info, likely unregistered or blocked
        print(e)
        return False
    return False

def bool_to_human(value):
    return ("no", "yes")[value]
    
# url = "http://example.com"
# url = "http://google.com"
url = "http://www1.pybrain.org/?tm=1&subid4=1755681068.0192320000"
domain = url.split("//")[-1].split("/")[0]
active = is_domain_active(url)
registered = is_domain_registered(domain)
print(f"Domain {domain} active {bool_to_human(active)} registered {bool_to_human(registered)}")
if registered and not active:
    print("Domain may be parked or down.")
elif not registered:
    print("Domain is likely available for registration.")

In [None]:
# request web page with session, user agent and reliable timeout using requests - needs about 3 seconds to run
find_package(["beautifulsoup4"])
import requests
import random
import inspect
import signal
from bs4 import BeautifulSoup

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0",
    "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36",
    "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Edg/139.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Linux; Android 12; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Mobile Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:117.0) Gecko/20100101 Firefox/117.0"
    # Add more user agents
]
headers = {
    'User-Agent': random.choice(USER_AGENTS),
    'Accept-Language': 'en-US,en;q=0.9',
    'Referer': 'https://www.google.com/'
}

def print_full_object(obj):
    print("All attributes and methods:")
    for name, value in inspect.getmembers(obj):
        print(f"{name}: {value}")

class TimeoutException(Exception):
    pass

def handler(signum, frame):
    raise TimeoutException("Request timed out")

def display_headers(headers):
    max_key_len = max(len(k) for k in headers.keys())
    print(f"{'Header':<{max_key_len}} | Value")
    print(f"{'-' * max_key_len}-|----------------")
    for key, value in headers.items():
        print(f"{key:<{max_key_len}} | {value}")
    
timeout_seconds = 3
signal.signal(signal.SIGALRM, handler)
print(headers)
try:
    signal.alarm(timeout_seconds)
    response = requests.Session().get('https://example.com', headers=headers, timeout=timeout_seconds)
    signal.alarm(0)  # disable alarm on success
    display_headers(response.headers)
    soup     = BeautifulSoup(response.content, 'html.parser')
    response = soup.prettify()
    print(f"response length is {len(response)} first 350 are:\n{response[:350]}")
except TimeoutException:
    print("Request timed out. Skipping response processing.")

In [None]:
# request web page with session, user agent and reliable timeout using httpx instead of requests
find_package(["beautifulsoup4","pandas"])
import httpx, random, signal
import pandas as pd
from bs4 import BeautifulSoup

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0",
    "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36",
    "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Edg/139.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Linux; Android 12; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Mobile Safari/537.36",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:117.0) Gecko/20100101 Firefox/117.0"
    # Add more user agents
]
headers = {
    'User-Agent': random.choice(USER_AGENTS),
    'Accept-Language': 'en-US,en;q=0.9',
    'Referer': 'https://www.google.com/'
}

def display_headers(headers):
    # headers_df = pd.DataFrame(response.headers.items(), columns=['Header', 'Value'])
    display((
        pd
        .DataFrame(headers.items(), columns=['Header', 'Value'])
        .style
        .set_properties(**{'text-align': 'left'})
        .set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}])
        .hide(axis='index')
    ))
    # for header, value in headers.items():
    #    print(f"{header}: {value}")

class TimeoutException(Exception):
    pass

def handler(signum, frame):
    raise TimeoutException("Request timed out")

def main():
    timeout_seconds = 3
    signal.signal(signal.SIGALRM, handler)
    print(headers)
    try:
        signal.alarm(timeout_seconds)
        with httpx.Client() as client:
            response = client.get('https://example.com')
            signal.alarm(0)  # disable alarm on success
            display_headers(response.headers)
            soup     = BeautifulSoup(response.content, 'html.parser')
            response = soup.prettify()
            print(f"response length is {len(response)} first 350 are:\n{response[:350]}")
    except TimeoutException:
        print("Request timed out. Skipping response processing.")

main()

### Here we have a styled text example

<p style="background-color:#383838; padding:16px;font-size: 24px;font-family: 'Alte Schwabacher';"> ⚠️ <b>Use the AI bot: Ask how to format numbers of type float using Python f-strings</b></p>
<!-- Kabinett Fraktur / Olde English / Koch Fette Deutsche Schrift UNZ1A / Walbaum\-Fraktur UNZ1 / Alte Schwabacher -->

In [4]:
import matplotlib
import shutil
shutil.rmtree(matplotlib.get_cachedir())

In [None]:
find_package(["matplotlib"])
import matplotlib.font_manager
# List all font names available in matplotlib (usually system fonts)
fonts = sorted(set(f.name for f in matplotlib.font_manager.fontManager.ttflist))
display(fonts)
# the font must be available at the host where the browser is running and NOT where jupyter is running.
# cp /path/to/OldEnglishFont.ttf ~/.local/share/fonts/old_english/
# fc-cache -f -v
# fc-list | grep -i "english"
# import matplotlib
# import shutil
# shutil.rmtree(matplotlib.get_cachedir())
# restart kernel

In [None]:
import concurrent.futures
import time

def do_something(seconds):
    print(f'Sleeping {seconds} second(s)...\n', end='') # cheap multiprocessing output handling
    time.sleep(seconds)
    return f'Done Sleeping...{seconds}'

start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = executor.map(do_something, list(reversed(range(1, 6))))
    if False: # change the word False at the left to True to see what was done inside do_something ...
        for result in results:
            print(result)
finish = time.perf_counter()
print(f'Finished in {round(finish-start, 2)} second(s)')

In [None]:
# now the output is part of the result, this prevents mixed output
# threads   --> use them if I/O bound
# processes --> use them if CPU bound
# brilliant.org/CMS
import concurrent.futures
import time
from datetime import datetime

def now():
    now = datetime.now()
    return now.strftime("[%Y%m%d %H%M%S] ")

def do_something(seconds):
    result = []
    result.append(f'{now()}Sleeping {seconds} second(s)...')
    time.sleep(seconds)
    result.append(f'{now()}Done Sleeping...{seconds}')
    return result

def main():
    start = time.perf_counter()
    with concurrent.futures.ProcessPoolExecutor() as executor:
        results = executor.map(do_something, list(reversed(range(1, 11)))) # [10..1]
        # execution continues here after all results have arrived
        if False: # change the word False at the left to True to see what happens inside do_something ...
            for result in results:
                for item in result:
                    print(item)
    finish = time.perf_counter()
    print(f'Finished in {round(finish-start, 2)} second(s)')

if __name__ == "__main__":
    main()

In [None]:
# cosine similarity example
find_package(["scikit-learn","pandas"])
import pandas as pd
import re
from IPython.display import HTML, display
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

twitter = """
Twitter is an online social media and social networking service owned and operated by American company X Corp.,
the legal successor of Twitter, Inc. Twitter users outside the United States are legally served by the Ireland-based
Twitter International Unlimited Company, which makes these users subject to Irish and European Union data protection laws.
On Twitter users post texts, photos and videos known as 'tweets'. Registered users can tweet, like, 'retweet' tweets,
and direct message (DM) other registered users, while unregistered users only have the ability to view public tweets.
Users interact with Twitter through browser or mobile frontend software, or programmatically via its APIs.
"""
facebook = """
Facebook is an online social media and social networking service owned by American technology giant Meta Platforms.
Created in 2004 by Mark Zuckerberg with fellow Harvard College students and roommates Eduardo Saverin, Andrew McCollum,
Dustin Moskovitz, and Chris Hughes, its name derives from the face book directories often given to American university students.
Membership was initially limited to only Harvard students, gradually expanding to other North American universities and,
since 2006, anyone over 13 years old. As of December 2022, Facebook claimed 2.96 billion monthly active users, and ranked third
worldwide among the most visited websites. It was the most downloaded mobile app of the 2010s. Facebook can be accessed from devices
with Internet connectivity, such as personal computers, tablets and smartphones. After registering, users can create a profile
revealing information about themselves. They can post text, photos and multimedia which are shared with any other users who have
agreed to be their friend' or, with different privacy settings, publicly. Users can also communicate directly with each other with
Messenger, join common-interest groups, and receive notifications on the activities of their Facebook friends and the pages they follow.
"""
tiktok = """
TikTok, and its Chinese counterpart Douyin (Chinese: 抖音; pinyin: Dǒuyīn), is a short-form video hosting service owned by ByteDance.
It hosts user-submitted videos, which can range in duration from 3 seconds to 10 minutes. Since their launches, TikTok and Douyin have
gained global popularity.[6][7] In October 2020, TikTok surpassed 2 billion mobile downloads worldwide. Morning Consult named TikTok the
third-fastest growing brand of 2020, after Zoom and Peacock. Cloudflare ranked TikTok the most popular website of 2021,
surpassing google.com.
"""
instagram = """
Instagram is a photo and video sharing social networking service owned by American company Meta Platforms. The app allows users to
upload media that can be edited with filters and organized by hashtags and geographical tagging. Posts can be shared publicly or
with preapproved followers. Users can browse other users' content by tag and location, view trending content, like photos, and follow
other users to add their content to a personal feed. Instagram was originally distinguished by allowing content to be framed only in a
square (1:1) aspect ratio of 640 pixels to match the display width of the iPhone at the time. In 2015, this restriction was eased with
an increase to 1080 pixels. It also added messaging features, the ability to include multiple images or videos in a single post, and a
Stories feature—similar to its main competitor Snapchat—which allowed users to post their content to a sequential feed, with each post
accessible to others for 24 hours. As of January 2019, Stories is used by 500 million people daily.
"""

def display_no_size_info(df, max_cols=20):
    """
    Display a pandas DataFrame in Jupyter/IPython without showing the size information footer.

    This function converts the DataFrame to HTML, removes the default pandas footer 
    that displays the number of rows and columns (e.g., "[4 rows x 302 columns]"), 
    and then renders the cleaned HTML for display.

    Parameters:
    -----------
    df : pandas.DataFrame
        The DataFrame to display without the size summary.

    Notes:
    ------
    - This function is intended for use in Jupyter notebooks or IPython environments
      where pandas DataFrames are rendered as HTML tables.
    - The method relies on regex removal of the HTML footer and may need adjustment 
      if pandas changes its HTML structure in future versions.
    """
    # Get the HTML representation of the DataFrame
    html = df.to_html(max_cols=max_cols)
    # The footer div usually starts with <div class="dataframe_info"> or similar; find and remove it.
    cleaned_html = re.sub(r'<div class="dataframe_info">.*?</div>', '', html, flags=re.DOTALL)
    display(HTML(cleaned_html))

documents        = [twitter, facebook, tiktok, instagram]
count_vectorizer = CountVectorizer(stop_words="english")
count_vectorizer = CountVectorizer() # we want all words ...
sparse_matrix    = count_vectorizer.fit_transform(documents)
doc_term_matrix  = sparse_matrix.todense()
df = pd.DataFrame(
   doc_term_matrix,
   columns=count_vectorizer.get_feature_names_out(),
   index=["twitter", "facebook", "tiktok", "instagram"],
)
print(f"That are {df.shape[1]} columns")
display_no_size_info(df)                          # formatted output
# pd.set_option('display.width', 2000)            # Disable line wrapping width limit
# pd.set_option('display.max_columns', 20)        # Display 20 columns
# print(df.to_string(max_rows=None, max_cols=20)) # pure textual output without size line

# compute cosine similarity
similarity = cosine_similarity(df, df)

# wrap in DataFrame with labels
sim_df = pd.DataFrame(
    similarity,
    index=df.index,  # row labels
    columns=df.index # column labels
)
display(sim_df)

# result with stopwords and 302 columns                result with stopwords removed and 250 columns
#             twitter  facebook    tiktok  instagram              twitter  facebook    tiktok  instagram
# twitter    1.000000  0.498128  0.232696   0.493960   twitter    1.000000  0.282005  0.035266   0.335484
# facebook   0.498128  1.000000  0.349222   0.603528   facebook   0.282005  1.000000  0.053773   0.262330
# tiktok     0.232696  0.349222  1.000000   0.304792   tiktok     0.035266  0.053773  1.000000   0.035266
# instagram  0.493960  0.603528  0.304792   1.000000   instagram  0.335484  0.262330  0.035266   1.000000

## Just debug the tcp communication of a docker container and do not modify it
- https://github.com/byF/docker-tcpflow
- https://github.com/byF/docker-tcpflow/blob/master/Dockerfile
- docker run --net="container:n8n-ollama" byfcz/tcpflow -p -c
- see also ~/docker/tools providing lsof, netstat, ss and vi

In [None]:

# CPU temperature displayed at the top/middle 
import tkinter as tk
import signal
import sys
from typing import Optional

# cat /sys/class/hwmon/hwmon2/temp1_label # at this host it is Tctl which is CPU
TEMP_FILE = "/sys/class/hwmon/hwmon2/temp1_input"
TEMP_THRESHOLD = 65
UPDATE_INTERVAL_MS = 250
UPDATE_SKIP_COUNT = 4

# could be class-based, but isn't yet

counter: int = 0
temp: Optional[float] = None

def read_cpu_temperature() -> Optional[float]:
    """
    Read CPU temperature in Celsius from the system sensor file.

    Returns:
        float: Temperature in Celsius, or None if reading failed.
    """
    try:
        with open(TEMP_FILE, "r") as f:
            temp_str = f.read().strip()
            temp_c   = int(temp_str) / 1000 # value is milli celsius
            return temp_c
    except Exception:
        return None

def update_temperature() -> None:
    """
    Update temperature label text and color periodically.
    """
    global counter, temp
    if counter == 0:
        temp = read_cpu_temperature()
        if temp is not None:
            text = f"{int(temp)}°C"
            if temp < TEMP_THRESHOLD:
                label.config(text=text, fg="green")
            else:
                label.config(text=text, fg="red")
        else:
            label.config(text="--°C", fg="yellow")
    counter = (counter + 1) % UPDATE_SKIP_COUNT
    root.after(UPDATE_INTERVAL_MS, update_temperature)

def center_window() -> None:
    """
    Center the window horizontally near the top of the screen.
    """
    root.update_idletasks()
    width  = root.winfo_width()
    height = root.winfo_height()
    screen_width = root.winfo_screenwidth()
    x = (screen_width // 2) - (width // 2)
    y = 30
    root.geometry(f"{width}x{height}+{x}+{y}")

def signal_handler(sig: int, frame: Optional[object]) -> None:
    """
    Cleanly exit the application on signal interrupt.
    """
    root.destroy()
    sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)
root = tk.Tk()
root.title("CPU Temperature")
root.overrideredirect(True)
root.attributes("-topmost", True)
root.attributes("-alpha", 0.7)
label = tk.Label(root, text="", font=("Helvetica", 24), bg="black")
label.pack()
update_temperature()
center_window()u
root.mainloop()

### Start python script when user logs into the system

create ~/.config/systemd/user/python-script.service
```
[Unit]
Description=Run Python Script on user login

[Service]
ExecStart=/usr/bin/bash -c 'sleep 20 && /usr/bin/python3 /home/username/code.py'
Restart=on-failure

[Install]
WantedBy=default.target
```
commands to activate that:
```
systemctl --user daemon-reload
systemctl --user enable python-script.service
systemctl --user start python-script.service
```
command to check status:
```
systemctl --user status python-script.service
```

### pyinstrument aka flmagraph profiling

In [None]:
# pyinstrument my_script.py

In [None]:
!pip install pyinstrument
import time
from pyinstrument import Profiler
profiler = Profiler()
profiler.start()
time.sleep(0.5)
profiler.stop()
print(profiler.output_text(unicode=True, color=True))

### structlog aka logs that are not garbage

In [None]:
import structlog
log = structlog.get_logger()
log.info("user_logged_in", user_id=42)

### pyrsistent aka Immutable Data Structures That Actually Work

In [None]:
!pip install pyrsistent
from pyrsistent import pmap
original = pmap({'a': 1})
modified = original.set('b', 2)
print(original)  # {'a': 1}
print(modified)  # {'a': 1, 'b': 2}

### deepdiff aka Detect All the Tiny Differences

In [None]:
!pip install DeepDiff
from deepdiff import DeepDiff
d1 = {"a": 1, "b": {"x": 10, "y": 20}}
d2 = {"a": 1, "b": {"x": 15, "y": 20}}
print(DeepDiff(d1, d2))
# {'values_changed': {"root['b']['x']": {'new_value': 15, 'old_value': 10}}}

### anyio aka Async Done Right (And Sanely)

In [None]:
import anyio
async def do_work():
    async with anyio.create_task_group() as tg:
        tg.start_soon(some_async_function)

### tqdm.contrib.concurrent aka Multi-Threaded Progress Bars, Finally

In [None]:
import time
from tqdm.contrib.concurrent import thread_map
def process(item):
    # Simulate work
    time.sleep(0.5)
    return item * 2
results = thread_map(process, range(100), max_workers=7)

### glom aka The Data Access Toolkit You Didn’t Know You Needed

In [None]:
!pip install glom
from glom import glom
data = {
    "data": {
        "items": [{"name": "Alpha"}, {"name": "Beta"}]
    }
}
print(glom(data, 'data.items.1.name'))  # Output: Beta

### diskcache aka When You Want Cache but Hate Complexity

In [None]:
!pip install diskcache
import diskcache as dc
cache = dc.Cache('./mycache')
@cache.memoize()
def expensive_calc(x):
    print("Calculating...")
    return x * 42
print(expensive_calc(10))  # Calculates and caches
print(expensive_calc(10))  # Instant from disk

### tenacity aka Retry Until It Works (Or You Rage Quit)

In [None]:
from tenacity import retry, stop_after_attempt, wait_fixed
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
def fragile_function():
    print("Trying...")
    raise Exception("Oops")
fragile_function()

### boltons aka Batteries You Didn’t Know Were Missing

In [None]:
from boltons.iterutils import chunked
for group in chunked(range(10), 3):
    print(group)
# Output: [0, 1, 2], [3, 4, 5], [6, 7, 8], [9]

## Python Libraries
|Library|Description|
|-------|-----------|
|anyio|Async Done Right (And Sanely)|
|boltons|Batteries You Didn’t Know Were Missing|
|deepdiff|Detect All the Tiny Differences|
|diskcache|When You Want Cache but Hate Complexity|
|glom|The Data Access Toolkit You Didn’t Know You Needed|
|gradio or streamlit|Rapid UI|
|openai|access this non local llms|
|pyinstrument|flamegraph-style profiler|
|pyrsistent|Immutable Data Structures That Actually Work|
|python-dotenv|Store API keys securely|
|rich|For beautiful console logs|
|structlog|Logs That Aren’t Garbage|
|tenacity|Retry Until It Works (Or You Rage Quit)|
|tqdm.contrib.concurrent|Multi-Threaded Progress Bars, Finally|
|typer|Add a CLI interface to your automation scripts|
|watchdog|Automatically run scripts when files change|

## Prompts

In [None]:
import openai
openai.api_key = "your_sk"
prompt = f"""
You are an automation assistant trained in Python development and scripting.
Your job is to take the following user input and perform a highly specific automated transformation.
### Here's the task:
{user_task}
### Here's the input data (if any):
{input_data}
### Your job is to:
- Write complete Python code for automating the task.
- Include all necessary libraries and handle exceptions.
- Optimize for readability and reuse.
- Return only runnable Python code (no explanation).
If required, use OpenAI's functions or APIs to complete the task.
"""
response = openai.ChatCompletion.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are an expert Python automation engineer."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.3
)
code_snippet = response["choices"][0]["message"]["content"]
print(code_snippet)

### with UI

In [None]:
import gradio as gr
def auto_dev(task, data):
    # Call GPT-4o with the automation prompt
    return generate_code(task, data)  # Your GPT wrapper function
gr.Interface(
    fn=auto_dev,
    inputs=["text", "text"],
    outputs="code",
    title="Auto Developer",
    description="Describe the task and paste input - get working Python code."
).launch()

### Example of fixing broken CSV

In [None]:
user_task = "Fix malformed CSV where some rows have more or fewer columns than the header. Align data properly. Drop incomplete rows."
input_data = "path/to/broken.csv"

### Generating tools (with 4 example tasks)

In [None]:
dev_tasks = [
    "Convert JSON to CSV",
    "Summarize Python logs",
    "Extract SQL queries from text",
    "Generate OpenAPI schemas from code comments"
]
chain_prompt = f"""
For each of the following tasks, create a ChatGPT prompt that:
- Clearly defines the task
- Requests Python code for automation
- Optimizes for minimal dependencies
- Returns only code, no commentary
Tasks:
{dev_tasks}

## Sources

- https://blog.stackademic.com/this-chatgpt-prompt-unlocks-features-you-didnt-know-existed-fd175fc59d14
- https://python.plainenglish.io/10-python-quality-of-life-libraries-i-wish-i-used-sooner-2aa5793b4a98
