<a href="https://colab.research.google.com/github/MarkusSchilling/py_scripts/blob/main/Python_Helper_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Python Helper Functions

This notebook is supposed to collect some functions that may come up handy at some time when trying to write some scripts. This collection may never be finished but it also does raise no claim to completeness, at all.

# Installation & Imports of relevant packages

Occasionally, the list of package installations and imports may have to be updated, depending on the requirements set by the functions that are defined below.

In [None]:
# Packages that need to installed (for the sake of completeness, there are a lot of packages given, here.)
%pip install rdflib
%pip install sparqlwrapper

In [None]:
# Imports that need to performed (for the sake of completeness, there are a lot of imports given, here.)
from rdflib import Graph, Namespace, URIRef, Literal, XSD, RDF, RDFS, PROV, OWL, DC
from rdflib.term import Identifier
from rdflib.collection import Collection
from rdflib.namespace import RDF, RDFS, SKOS, XSD, OWL
import rdflib.plugins.sparql.update
import pandas as pd
import io
from io import StringIO
import urllib.parse
from IPython.display import display, Markdown, HTML, JSON
from scipy.spatial import Delaunay
from scipy.spatial.distance import euclidean
import numpy as np
import matplotlib.pyplot as plt
import sys
import difflib

# Helper Functions

## Find differences in strings

In [None]:
# Function to find differences in two strings
import difflib

cases=[("String 1", "String 2")]
# Instead of strings given in "", lists and dataframes (variables) can also be used, such as: cases=[(CSV, CSV2)]

for a,b in cases:
    print('{} => {}'.format(a,b))
    for i,s in enumerate(difflib.ndiff(a, b)):
        if s[0]==' ': continue
        elif s[0]=='-':
            print(u'Delete "{}" from position {}'.format(s[-1],i))
        elif s[0]=='+':
            print(u'Add "{}" to position {}'.format(s[-1],i))
    print()

## Global Counter

In [None]:
# Defining a global counter possibly usable for the creation of arbitrary instance names (numbers)
instanceCounter = 0
def nextInstanceNum():
    global instanceCounter
    instanceCounter = instanceCounter + 1
    return str(instanceCounter)

In [None]:
# Helper method 'add' used to write triples to an RDF graph
def add(s,p,o):

    # in this case p is "ObjectProperty"
    if o.find('http://')==0 or o.find('https://')==0:
        g.add( (URIRef(s), URIRef(p), URIRef(o)) )

    # in this case p is "DatatypeProperty"
    else:
        # if we can parse o as Float, just set the datatype
        try:
            g.add( (URIRef(s), URIRef(p), Literal(float(o), datatype=XSD.float) ))
        except:
            g.add( (URIRef(s), URIRef(p), Literal(o) ))

## Read in CSV data

In [None]:
# Read in CSV data

CSV = open("test.csv").read()
# CSV dataset namehas to be set, accordingly. Function will store data as a string!

# Store data directly to a dataframe
import pandas as pd
CSV_df = pd.read_csv('test.csv')
# CSV dataset namehas to be set, accordingly.

# Also possible: load data from the 'CSV-string' created above to a dataframe
data = pd.read_csv(io.StringIO(CSV), sep=';')

In [None]:
# Change standard output to a file of choice
import sys
old_stdout = sys.stdout
sys.stdout = open('output-file.csv', 'w') # Outpul file name AND format have to be changed, accordingly.

## Change the standard output file in Jupyter Notebook to a file of choice

In [None]:
# How to write strings to a CSV line by line
text = "This is an example., This is another example for the second line. "
# Set ',' as separator for 1 line; set ';' as separator for the next column.

s = StringIO(text)
with open('fileName.csv', 'w') as f:
    for line in s:
        f.write(line)

# The fileName has to be set, accordingly.

## Compare strings (case sensitive)

In [None]:
# How to compare strings (case sensitive)
string1 = "Abrar"
string2 = "Ahmed"
string3 = "ABCD"
string4 = "ABCD"
if string1 <= string2:
    print(string1," is smaller ",string2," is greater")
if string2 >= string4:
    print(string4," is smaller ", string2," is greater")
if string3 == string4:
    print(string3," is equal to ",string4)
if string1 != string3:
    print(string1," is not equal to ", string3)

# The second method is to use a dedicated string function to perform comparisons,
# the __eq__() function. It is a magic function defined in the string class and
# compares two strings to return True if they are equal or Fale if they are not.

# if s1.__eq__(s2):
  #  print('s1 and s2 are equal.')

## Compare strings (case insensitive)

In [None]:
# How to compare strings (case insensitive)
s1 = 'String'
s2 = 'String'
s3 = 'string'

if s1.casefold() == s3.casefold():
    print(s1.casefold())
    print(s3.casefold())
    print('s1 and s3 are equal in case-insensitive comparison')

if s1.lower() == s3.lower():
    print(s1.lower())
    print(s3.lower())
    print('s1 and s3 are equal in case-insensitive comparison')

if s1.upper() == s3.upper():
    print(s1.upper())
    print(s3.upper())
    print('s1 and s3 are equal in case-insensitive comparison')

## Transfom Inputs to IRIs (Ontology development)

In [None]:
# Function to transform inputs to IRIs.
def to_iri(input):
    try:
        return input.iri
    except:
        pass
    return input

# Function to write the result of a SPARQL query into a (pandas) data frame.
import pandas as pd
def sparql_result_to_df(res):
    l = []
    for row in res:
        r = [ to_iri(item)  for item in row]
        l.append(r)
    return pd.DataFrame(l)

## Triangulation to get the average distances between X and Y coordinates

This function originated from an analysis of precipitates as a result of a heat treatment of an aluminium alloy. The result will also be plotted when using this function.

In [None]:
# Function for triangulation to get the average precipitate distance out of X and Y coordinates in dependence on the material designation and the images taken.

MATL_DESIGNATION = 1
# MATL_DESIGNATION_LABEL = 2
IMG = 4
XPOSITION = 6
YPOSITION = 8
plt.subplots(figsize=(12., 12.))
cmap = plt.get_cmap("Purples")
lambdas_50pct = dict()
for matl_designation, group in df.groupby(by=[MATL_DESIGNATION]):
    result = []
    for img, subgroup in group.groupby(by=[IMG]):
      tup = subgroup[YPOSITION], subgroup[XPOSITION]
      point_coords = np.vstack(tup).T
      tri = Delaunay(point_coords)
      for simplex in tri.simplices:
          #Compute the three distances of the points 1-2, 1-3, 2-3
          for i,j in ((0,1), (0, 2), (1,2)):
              point = (point_coords[simplex[i]], point_coords[simplex[j]])
              result.append(euclidean(point[0], point[1]))
    X = np.sort(result)
    X *= 0.15 #nm per pixel
    Y = np.linspace(0., 1., num=len(X))
    lambda_50pct = X[int(len(X)/2)]
    lambdas_50pct[matl_designation] = lambda_50pct
    print(matl_designation, ", approx 50% quantil: {lambda_50pct:.1f}".format(lambda_50pct=lambda_50pct))
    plt.plot(X, Y, '.', label=(matl_designation + "lambda=", str(lambda_50pct)))

plt.xlabel("Distance in nm")
plt.ylabel("Quantile")
# plt.legend()
plt.show()