<a href="https://colab.research.google.com/github/aknip/Coding-Cheatsheets/blob/main/Python-Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Python basics

- Installs everything and prompts for secrets in first run
- Avoids this in all subsequent runs (no redudant installations, optimized for speed)

In [None]:
%%capture --no-stderr
import psutil
IN_NOTEBOOK = any(["jupyter-notebook" in i for i in psutil.Process().parent().cmdline()])
if IN_NOTEBOOK:
  try:
      # imports
      from playwright.async_api import async_playwright
      from playwright.sync_api import Page, expect
      from loguru import logger
  except ImportError:
      !pip install pytest-playwright loguru --quiet
      !playwright install
      !pip install icecream dateparser re --quiet
      # same imports as above...
      from playwright.async_api import async_playwright
      from playwright.sync_api import Page, expect
      from loguru import logger

import json
import sys, os, shutil
import asyncio
import atexit
from getpass import getpass
import warnings

warnings.filterwarnings('ignore')

# Virtual Environement mit virtualenv
- Virtualenv
	- Insall `pip install --user virtualenv`
	- Create new environment:
		- Create project folder, go into it then `python3 -m venv env`
	- Activate:  `source env/bin/activate`
	- Deactivate: `deactivate
	- Details: https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments
- Pyenv
	- kann auch andere Python Versionen nutzen?
	- Beispielprojekt: https://github.com/Forethought-Technologies/AutoChain

# Dependencies mit pip
- Dependencies installieren:
	- Mit  `pip install lib1 lib2 lib3 --quiet` etc. mehrere Libraries installieren
	- Alternativ: `pip install -r requirements.txt`
	- Das file `requirements.txt` inkl. Versionsnummern kann per `pip freeze > requirements.txt` erzeugt werden
  - `pip show pyautogen` zeigt lokale location der library

# Credentials

In [None]:
if IN_NOTEBOOK:
  try: CREDS
  except NameError:
    CREDS = json.loads(getpass("Secrets (JSON string): "))
    os.environ['CREDS'] = json.dumps(CREDS)
    CREDS = json.loads(os.getenv('CREDS'))
os.environ["OPENAI_API_KEY"] = CREDS['OpenAI']['v1']['credential'] # my key
os.environ["TOGETHERAI_API_KEY"] = CREDS['together-ai']['key']['credential']
os.environ['ANTHROPIC_API_KEY'] = CREDS['anthropic']['key']['credential']

# Debugging and Logging

In [None]:
proj_folder = 'project'

# Delete / create directory
if os.path.exists(proj_folder):
    shutil.rmtree(proj_folder)
os.mkdir(proj_folder)

try:
  logger.level("CUSTOM", no=22, color="<black>", icon="@")
except:
  print("Log level 'CUSTOM' already defined.")
logger.remove()  # Remove all handlers added so far, including the default one.
logger.add(sys.stderr, level="TRACE", format="{time:HH:mm:ss} | <level>{level: <10}</level> | {message}")
logger.add(proj_folder + "/log_all.log", level="TRACE", format="{time:HH:mm:ss} | {level: <10} | {message}")
logger.add(proj_folder + "/log_success.log", level="SUCCESS", format="{time:HH:mm:ss} | {level: <10} | {message}")
logger.add(proj_folder + "/log_error.log", level="ERROR", format="{time:HH:mm:ss} | {level: <10} | {message}")

Log level 'CUSTOM' already defined.


11

In [None]:
logger.trace("A trace message.")
logger.debug("A debug message.")
logger.info("An info message.")
logger.success("A success message.")
logger.warning("A warning message.")
logger.error("An error message.")
logger.critical("A critical message.")
logger.log("CUSTOM", "A custom level.")

11:24:46 | [36m[1mTRACE     [0m | A trace message.
11:24:46 | [34m[1mDEBUG     [0m | A debug message.
11:24:46 | [1mINFO      [0m | An info message.
11:24:46 | [32m[1mSUCCESS   [0m | A success message.
11:24:46 | [31m[1mERROR     [0m | An error message.
11:24:46 | [41m[1mCRITICAL  [0m | A critical message.
11:24:46 | [30mCUSTOM    [0m | A custom level.


In [None]:
# custom function as log handler - will be executed with every log call

async def custom_function(msg):
  msg_obj = json.loads(msg)
  message = msg_obj['record']['message'] #print(msg_obj['text'])
  level = msg_obj['record']['level']['name']
  print(message)
logger.add(custom_function, format="{level} | {message}", serialize=True)

logger.info("An info message.")

11:24:48 | [1mINFO      [0m | An info message.


An info message.


In [None]:
# do not use print() - use ic() instead !

from icecream import ic
def foo(i):
    return i + 333
ic(foo(123))

ic| foo(123): 456


456

# Code structure, imports

In [None]:
# given: local file helper_functions.py

import helper_functions # => imports AND executes file

from helper_functions import function1, function2 # => imports functions (defined with def:), DOES NOT execute

# Strings

In [None]:
# Langen String im Code mit Backslash auf mehere Zeilen aufteilen
long_string = 'Hier die ersten Wörter\
 und hier die nächsten. Wichtig:\
 Hinter dem Backslash kein SPACE!'


# Die ersten 10 Zeichen eines Strings
short_string = long_string[:10]

# Langen String (ohne Zeilenumbrüche) ausgeben mit Zeilenumbruch bei 80 Zeichen
import textwrap
print(textwrap.fill(long_string, 80))
# Langer String MIT Zeilenumbrüchen
for x in long_string.split('\n'):
  print(textwrap.fill(x, 80))

# Text in Array splitten - Trennzeichen doppelter Zeilenumbruch
paragraphs_array = fulltext.split('\n\n')

# Array mit Texten in einen String konvertieren, doppelter Zeilenumbruch als Trenner
fulltext_from_array = '\n\n'.join(paragraphs_array)

# Zahl in String wandeln, mit 3 führenden Nullen
number_string = str(number).zfill(3) # 007

# Datum / Uhrzeit als String
from datetime import datetime
current_date_time = datetime.now().strftime("%d.%m.%Y, %H:%M:%S")
filename = datetime.now().strftime("%Y%m%d-%H%M%S")
# 15.08.2022, 17:49:46


# String für Filenamen optimieren (Leerzeichen und Sonderzeichen entfernen)
filename = "This is not/ valid!-123"
clean_filename = "".join( x for x in filename if (x.isalnum() or x in "._-"))
print(clean_filename)


# Templating with Strings

In [None]:
# Nested templates

# Variant 1:
template = 'Hello, {var1} {var2}.'
var1 = 'John'
var2 = 'Doe'
result1 = template.format(var1=var1, var2=var2)
# Hello, John Doe.

# Variant 2:
def fstr(template):
    return eval(f"f'{template}'")
result2 = fstr(template)
# Hello, John Doe.

result3 = f"{template}" # does not work
result4 = f"Hello {var1}" # works

print(result2)

# Date and time / Formatting / Measure time

In [None]:
import datetime
from pandas.tseries.offsets import BDay
# Heute
today = datetime.datetime.today()
print(today)
# Lezter Werktag (wenn heute = MO wäre das FR)
print(today - BDay(1))
# Datum definieren
my_date = datetime.date(2024, 1, 22)
print(my_date)

2024-01-25 08:56:55.050608
2024-01-24 08:56:55.050608
2024-01-22


In [None]:
# Datum aus String erzeugen, für DE und US
# !pip install dateparser
import dateparser
date_string = "Montag, 10. Juli 2023 08:42:25"
date_obj = dateparser.parse(date_string)
# OR: date_obj = dateparser.parse(date_string, languages=['de'])

# Datum / Uhrzeit als string
from datetime import datetime
current_date_time = datetime.now().strftime("%d.%m.%Y, %H:%M:%S")
# 15.08.2022, 17:49:46

# Dauer messen
import time
start = time.time()
print("hello")
time.sleep(1) # waits for 1 sec
end = time.time()
print(end - start)

# Regex

In [13]:
import re

# Finde Zahl in String, z. B. die "123" im folgenden String:
#    project x/audio/chunks/segment_123.mp3
# https://regex101.com/r/xwBbvc/1
file_number_regex = re.findall("segment_(\d+).mp3", "project x/audio/chunks/segment_123.mp3")
if file_number_regex:
	file_number = file_number_regex[0] # 1st matching group
	print(file_number)
else:
	print('Nothing found...')

# Another demo
# Find first occurance
input_txt = "This is a demo text. <Find and replace me>. Lorem ipsum."
text_inside_tags = re.findall('(?sm)<(.+)>', input_txt)[0]
print(text_inside_tags)
# Replace using groups
new_text = 'REPLACED!'
replaced_txt = re.sub('(<)(.+)(>)', r'\1' + new_text + r'\3' , input_txt)
print(replaced_txt)


# Ersetze alle führenden SPACES zu Beginn von Zeilen:
input_txt = """
  This is a demo text.
	   And a second line
		     Lorem ipsum.
"""
output_text = re.sub('\n\s{2,}', '\n', input_txt)
print(output_text)


123
Find and replace me
This is a demo text. <REPLACED!>. Lorem ipsum.

This is a demo text. 
And a second line
Lorem ipsum.



# Array

In [None]:

# Array Funktionen
``` Python
# Loop through array with index (enumerate as "helper" for index)
for index, part in enumerate(my_array):
    print(index,part)

```


# Dictionary / Serializing (pickle)

> Eingerückter Textblock



In [4]:
# Dictionary (ähnlich Javascript-Objekt/JSON)

# Creating a Dictionary
Dict = {'Name': 'Geeks', 1: [1, 2, 3, 4]}
print("Creating Dictionary: ")
print(Dict)

# accessing a element using key - without or without default/fallback
print("Accessing a element using key:")
print(Dict['Name'])
print(Dict['Phone'], 'Sorry, no Phone in Dictionary')

# accessing a element using get()
# method
print("Accessing a element using get:")
print(Dict.get(1))

# creation using Dictionary comprehension
myDict = {x: x**2 for x in [1,2,3,4,5]}
print(myDict)

# access dict and
from contextlib import suppress
print('Access existing element "Name":')
with suppress(KeyError): print(Dict['Name'])
print('Access non existing element, skipping error:')
with suppress(KeyError): print(Dict['yoyo'])

# convert/print Dictionary to JSON in formatted way
print(json.dumps(myDict, sort_keys=True, indent=2))



Creating Dictionary: 
{'Name': 'Geeks', 1: [1, 2, 3, 4]}
Accessing a element using key:
Geeks
Accessing a element using get:
[1, 2, 3, 4]
{1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
Access existing element "Name":
Geeks
Access non existing element, skipping error:


In [None]:
import pickle

data = {
    "1": {
        "input": "chat input",
        "output": "chat output"
    }
}

# append data
data.update({"2": {
        "input": "chat input2",
        "output": "chat output2"
    }})

# save data to file
file = open('chat.dump', 'wb')
pickle.dump(data, file)
file.close()

# read data from file
file = open('chat.dump', 'rb')
data2 = pickle.load(file)
file.close()

print(data2)

# Files

In [None]:


# File schreiben, lesen, kopieren, Pfade/Filenamen bearbeiten
``` Python
import io
import os

# Read full file
f= open('testfile.txt','r')
if f.mode == 'r':
      contents =f.read()
      print(contents)
f.close()

# Read file line-by-line
f= open('testfile.txt','r')
if f.mode == 'r':
      f1 = f.readlines()
      for x in f1:
          print(x)
f.close()

# Write new file
f= open('testfile.txt','w+')
for i in range(10):
     f.write('This is line %d\r\n' % (i+1))
f.close()

# Append to file
f= open('testfile.txt','a+')
f.write('this is appended text 1\r\n')
f.write('this is appended text 2\r\n')
f.close()

# Delete file
os.remove('testfile.txt')

# Copy file
import shutil
shutil.copyfile(original_with_path, target_with_path)

# Get filename from full path
just_filename = os.path.basename(full_path_incl_filenmae)


In [None]:
import os

# Create directory (if it not exists already)
directory_name = 'test-folder'
if not os.path.exists(directory_name):
	os.mkdir(directory_name)


# Delete directory recursively (if it exists already)
directory_name = 'test-folder'
if os.path.exists(directory_name):
    shutil.rmtree(directory_name)


# Walk through directory, recursively incl. subdirectories
directory_path = "."
for root, _, files in os.walk(directory_path):
    for file_name in files:
        file_name_with_path = os.path.join(root, file_name)
        print(file_name_with_path)
        if file_name.endswith('.xlsx'):
            print('Excel found: ', file_name_with_path)

./.config/.last_opt_in_prompt.yaml
./.config/.last_update_check.json
./.config/config_sentinel
./.config/gce
./.config/active_config
./.config/.last_survey_prompt.yaml
./.config/default_configs.db
./.config/logs/2024.01.25/14.23.07.113867.log
./.config/logs/2024.01.25/14.22.36.490195.log
./.config/logs/2024.01.25/14.22.55.873995.log
./.config/logs/2024.01.25/14.23.08.041697.log
./.config/logs/2024.01.25/14.22.46.785493.log
./.config/logs/2024.01.25/14.22.10.002730.log
./.config/configurations/config_default
./sample_data/README.md
./sample_data/anscombe.json
./sample_data/california_housing_train.csv
./sample_data/mnist_test.csv
./sample_data/mnist_train_small.csv
./sample_data/california_housing_test.csv


# ZIP

In [None]:

# ZIP (Archive, zip, unzip)
``` Python
# 1.
# with shutil
import shutil
# zip
shutil.make_archive('archive-name-without-.zip', 'zip', 'folder-which-should-be-zipped')
# unzip
shutil.unpack_archive('archive.zip', 'destination-folder')

# 2.
# with zipfile - example: Exclude folder for zip
# https://datagy.io/python-zip-unzip-files/
import os
import zipfile
# zip
directory = "folder-which-should-be-zipped"
with zipfile.ZipFile("zipfile.zip", "w") as zip:
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            srcpath = os.path.join(subdir, file)
            if (subdir != 'exclude-folder1') and (subdir.startswith('exclude-folder2') == False):
              #print(subdir)
              dstpath_in_zip = os.path.relpath(srcpath, start=directory)
              with open(srcpath, 'rb') as infile:
                  print(srcpath)
                  zip.writestr(dstpath_in_zip, infile.read())
# unzip
with zipfile.ZipFile('zipfile.zip', 'r') as zip:
    zip.printdir()
    zip.extractall('./')
```


# Webscraping

In [None]:
#!pip install bs4
import requests
from bs4 import BeautifulSoup

response = requests.get("https://www.tagesschau.de/inland/hochwasser-deutschland-126.html")
soup = BeautifulSoup(response.text, 'html.parser')
all_text = soup.get_text(separator='\n', strip=True)
div_text=soup.find("div",{"class":"layout-container"}).get_text()
article_text=soup.find("article").get_text(separator='\n', strip=True)
print(article_text)

Deutschland
Hochwasserlage bleibt angespannt
Stand: 30.12.2023 18:22 Uhr
In einigen Landkreisen in Niedersachsen hat sich die Hochwasserlage etwas beruhigt, in anderen steigen die Pegelstände noch. Der Deutsche Wetterdienst rechnet zumindest für heute nicht mit neuem Regen in den betroffenen Gebieten.
Die Hochwasserlage bleibt vor allem in Niedersachsen weiter kritisch. An einigen Pegeln der Weser befinden sich die Wasserstände noch über der höchsten Meldestufe, wie aus einem Lagebericht des
Landesbetriebs für Wasserwirtschaft, Küsten- und Naturschutz (NLWKN)
hervorgeht. Für die Leine, die Aller sowie die Ober- und Mittelweser gebe es eine Warnung vor großem Hochwasser. In
Schladen
im Landkreis Wolfenbüttel stieg der Pegelstand der Oker demnach um mehrere Zentimeter.
An der Weser bei
Drakenburg
überschritt der Wasserstand mit 835 Zentimetern den bisherigen Höchstwert aus 1981 um einen Zentimeter, wie der Überregionale Hochwasserdienst mitteilte. "Der Scheitel ist aber bereits erreicht 

# Office - Powerpoint

In [None]:


# Powerpoint Editieren
``` Python
# see my Notebook: https://github.com/aknip/Streamlit-Gradio/blob/main/Powerpoint%20and%20PDF%20Generation.ipynb

# Source: "Using Python to Update PowerPoint. Step by step tutorial to edit PowerPoint slides using Python": https://towardsdatascience.com/use-python-to-automate-the-powerpoint-update-4a385acf1243

# Setup:
# pip install python-pptx

# Fonts:
# sudo apt install fonts-open-sans
# sudo apt install fonts-roboto

from pptx import Presentation
my_ppt = Presentation('ppt-input/presentation.pptx')
# do something with python-pptx
my_ppt.save('ppt-output/presentation2.pptx')
```


# Powerpoint (Office) zu PDF Konvertieren
``` Python
# see my Notebook: https://github.com/aknip/Streamlit-Gradio/blob/main/Powerpoint%20and%20PDF%20Generation.ipynb

# Source: "Stackoverflow: Use unoconv on Ubuntu to convert PPTX to PDF": https://stackoverflow.com/a/63664087

# Setup:
# sudo apt install unoconv
# pip install tqdm
	# pip install glob

# Fonts:
# sudo apt install fonts-open-sans
# sudo apt install fonts-roboto

command = 'unoconv -f pdf -o "pdf-output/presentation.pdf" "ppt-input/presentation.pptx" '
os.system(command)
```


# Office - Word - Markdown

In [None]:

# Markdown / Word konvertieren, beide Richtungen (Pandoc / pypandoc)
``` Python
#pip install pypandoc
import pypandoc
# md to docx
docx_file = pypandoc.convert_file('input_file.md', 'docx', outputfile='output_file.docx')
# docx to md
md_file = pypandoc.convert_file('input_file.docx', 'md', outputfile='output_file.md')

```



# Word zu Markdown konvertieren
- Ideen/Quellen:
	- https://medium.com/geekculture/how-to-easily-convert-word-to-markdown-with-pandoc-4d60878ccc64
	- https://towardsdatascience.com/word-document-to-html-or-markdown-with-python-37db7150258c
	-



# Email Conversion .msg

In [None]:
!pip install extract-msg
import extract_msg
msg = extract_msg.openMsg("./msg/test-email.msg")
# or (better:)
!python3 -m extract_msg "msg/test-email.msg" --out msg

# Convert CSV to JSON

In [None]:
import pandas as pd
import json
from io import StringIO

CSV_string = """destination|country|attractions
Paris|France|Eiffel Tower, Louvre Museum
New York City|United States|Times Square, Statue of Liberty, Broadway shows
Kyoto|Japan|ancient temples, tranquil gardens, traditional geisha culture
Great Wall of China|China|
Rio de Janeiro|Brazil|Carnival celebrations, Copacabana Beach, Christ the Redeemer statue
Great Barrier Reef|Australia|stunning coral reefs, diverse marine life
Sydney Opera House|Australia|architectural brilliance
"""

response_IO = StringIO(CSV_string) # [4:] strip away first 4 chars "AI: "
response_df = pd.read_csv(response_IO, sep="|")
response_JSON = json.loads(response_df.to_json(orient='table',index=False))['data']
print(json.dumps(response_JSON, indent=4))

[
    {
        "destination": "Paris",
        "country": "France",
        "attractions": "Eiffel Tower, Louvre Museum"
    },
    {
        "destination": "New York City",
        "country": "United States",
        "attractions": "Times Square, Statue of Liberty, Broadway shows"
    },
    {
        "destination": "Kyoto",
        "country": "Japan",
        "attractions": "ancient temples, tranquil gardens, traditional geisha culture"
    },
    {
        "destination": "Great Wall of China",
        "country": "China",
        "attractions": null
    },
    {
        "destination": "Rio de Janeiro",
        "country": "Brazil",
        "attractions": "Carnival celebrations, Copacabana Beach, Christ the Redeemer statue"
    },
    {
        "destination": "Great Barrier Reef",
        "country": "Australia",
        "attractions": "stunning coral reefs, diverse marine life"
    },
    {
        "destination": "Sydney Opera House",
        "country": "Australia",
        "attractio