<h1>Data Exctraction - Cable</h1>

This notebooks goal is to visualize the distribution of the different corespecs of the cables. Thats done to look how why there are so many of them.

In [1]:
import pandas as pd
import plotly.express as px
from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
import re
import helpers.pipe_helpers as php

In [2]:
mat_cable = pd.read_excel("../../res/06 - Spezifikation DE-EN_Vorlage_CAD.xlsx", sheet_name="06 Leitungsart TSZ+TSG")

In [3]:
mat_cable.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241 entries, 0 to 240
Data columns (total 14 columns):
 #   Column                                           Non-Null Count  Dtype  
---  ------                                           --------------  -----  
 0   Unnamed: 0                                       223 non-null    object 
 1   AGL
(Ausgleichsleitung)
oder
TE
(Thermoleitung)  19 non-null     object 
 2   AGL
(Ausgleichsleitung)+C230:C235                127 non-null    object 
 3   Leitungsart TSG / TSZ                            221 non-null    object 
 4   Unnamed: 4                                       10 non-null     object 
 5   Unnamed: 5                                       0 non-null      float64
 6   Unnamed: 6                                       0 non-null      float64
 7   Unnamed: 7                                       0 non-null      float64
 8   Unnamed: 8                                       0 non-null      float64
 9   Unnamed: 9                      

In [4]:
mat_cable_cp = (mat_cable.pipe(hp.start_pipeline)
 .pipe(php.rename_column,column_name="Unnamed: 0",value="SAP_MATNR")
 .pipe(php.drop_columns,column_names=["Unnamed: 4","Unnamed: 5","Unnamed: 6","Unnamed: 7","Unnamed: 8","Unnamed: 9","Unnamed: 11","Unnamed: 12","Unnamed: 13"])
 .pipe(php.filterNaN_rows, column_names=["Leitungsart TSG / TSZ", "SAP_MATNR"])
 )

NameError: name 'hp' is not defined

In [None]:
mat_cable_cp.loc[:,["SAP_MATNR",'Leitungsart TSG / TSZ']].to_csv("../../res/results/export_cable.csv")

In [None]:
mat_cable_extracted = pd.read_csv("../../res/results/normalized_materials_cable_data.csv")

<h2>Visualize the Different types of sensors</h2>

In [None]:
from anytree import Node, RenderTree, PreOrderIter

# Focus on relevant columns and handle missing data
data_clean = mat_cable_extracted[['Material', 'Conductor Type', 'Diameter']].fillna('Unknown')

# Initialize the root of the tree and main categories
root = Node("Cable", count=0)
pt_node = Node("PTs", parent=root, count=0)
ntc_node = Node("NTCs", parent=root, count=0)
kty_node = Node("KTYs", parent=root, count=0)
lm_node = Node("LMs", parent=root, count= 0)
others_node = Node("Others", parent=root, count=0)

# Populate the tree and count rows
for _, row in data_clean.iterrows():
    sensorart, pt_class = row['Sensorart'], row['PT_Class']
    widerstand = row['Widerstand']
    b_wert = row['B_Wert']
    
    # Determine the category and add specific nodes
    if re.match(r"Pt\s?\d+", sensorart):
        parent_node = pt_node
    elif re.match(r"NTC", sensorart):
        parent_node = root
    elif re.match(r"KTY", sensorart):
        parent_node = kty_node
    elif re.match(r"LM", sensorart):
        parent_node = lm_node
    else:
        parent_node = others_node
    
    # Increment count for the category
    parent_node.count += 1
    root.count +=1
    
    # Create or fetch the sensor node
    sensor_node_name = f"{sensorart}"
    sensor_node = next((child for child in parent_node.children if child.name.startswith(sensor_node_name)), None)
    if not sensor_node:
        sensor_node = Node(sensor_node_name, parent=parent_node, count=0)
    sensor_node.count += 1

    # Further classify PTs by 'PT_Class' and NTCs by 'Widerstand'
    if parent_node == pt_node and pt_class != 'Unknown':
        class_node_name = f"Class: {pt_class}"
        class_node = next((child for child in sensor_node.children if child.name == class_node_name), None)
        if not class_node:
            class_node = Node(class_node_name, parent=sensor_node, count=0)
        class_node.count += 1
    elif parent_node == root and widerstand != 'Unknown':
        root.count -=1
        widerstand_node_name = f"Widerstand: {widerstand}"
        widerstand_node = next((child for child in sensor_node.children if child.name == widerstand_node_name), None)
        if not widerstand_node:
            widerstand_node = Node(widerstand_node_name, parent=sensor_node, count=0)
        widerstand_node.count += 1

        if b_wert != 'Unknown':
            b_wert_node_name = f'{widerstand} B_Wert: {b_wert}'
            b_wert_node = next((child for child in widerstand_node.children if child.name == b_wert_node_name), None)
            if not b_wert_node:
                b_wert_node = Node(b_wert_node_name, parent=widerstand_node, count=0)
            b_wert_node.count += 1

# Update node names with counts
for node in PreOrderIter(root):
    node.name = f"{node.name} ({node.count})"

# Print the tree structure
for pre, fill, node in RenderTree(root):
    print("%s%s" % (pre, node.name))


In [None]:
import matplotlib.pyplot as plt
import networkx as nx
from networkx.drawing.nx_pydot import graphviz_layout

G = nx.DiGraph()

# A helper function to ensure node names are properly quoted if necessary
def format_node_name(name):
    # Check if the name contains special characters and quote it if it does
    if ':' in name or any(c in name for c in ['-', ' ', '(', ')', '[', ']', '{', '}']):
        return f'"{name}"'
    return name

# Function to add nodes and edges with formatted node names
def add_nodes_edges(node):
    for child in node.children:
        parent_name = format_node_name(node.name)
        child_name = format_node_name(child.name)
        G.add_edge(parent_name, child_name)  # Add an edge to the graph with formatted names
        add_nodes_edges(child)  # Recursive call to add nodes/edges for children

# Initialize the process with the root node
add_nodes_edges(root)

# Calculate node sizes based on the number of descendants (or any other metric)
node_sizes = {node: int(node[node.rfind("(")+1:node.rfind(")")])*200 + 300 for node in G.nodes()}  # Example metric: out_degree * 200 + 300


# Use the graphviz layout to draw the tree
plt.figure(figsize=(80, 30))  # Adjust the size of the figure as needed
pos = graphviz_layout(G, prog='dot')  # This uses the 'dot' layout
nx.draw(G, pos, with_labels=False, arrows=True, node_size=[node_sizes[node] for node in G.nodes()], node_color="lightblue", font_size=12, font_weight="bold")

# Custom labels drawing
label_pos = {key: (value[0], value[1] - 5) for key, value in pos.items()}  # Adjust label positions below nodes
for node, (x, y) in label_pos.items():
    plt.text(x, y, node, fontsize=10, fontweight="bold", ha='center', rotation=90)  # ha='center' centers the text horizontally at the position

plt.title('Tree Visualization with Graphviz Layout')
plt.show()