In [1]:
import pandas as pd
import numpy as np
import random
import math

In [2]:
# Define the Node class
class Node:
    def __init__(self):
        self.positive_child = None
        self.negative_child = None
        self.label = None
        self.rule = None

# Define helper functions
def create_node():
    return Node()


In [3]:
def determine_class_label(Tsv):
    #print("Tsv in determine_class_label:", Tsv[:5])  # Add this line to check the input
    if not Tsv:
        return None
    
    label_count = {}
    for ts, label in Tsv:
        if label in label_count:
            label_count[label] += 1
        else:
            label_count[label] = 1
    return max(label_count, key=label_count.get)


In [4]:
def set_node_label(node, label):
    node.label = label

def label_node(node, rule):
    node.rule = rule

def satisfies_rule(ts, rule):
    pts, index = rule
    if not isinstance(pts, (list, tuple)) or not isinstance(index, int):
        return False
    
    if index < 0 or index >= len(ts):
        return False
    
    subseq = ts[index: index + len(pts)]
    for i, (value, sign) in enumerate(pts):
        if (sign == '+' and subseq[i] < value) or (sign == '-' and subseq[i] > value):
            return False
    return True


In [5]:
def split_dataset(Tsv, rule):
    Tsv_positive = [(ts, label) for ts, label in Tsv if satisfies_rule(ts, rule)]
    Tsv_negative = [(ts, label) for ts, label in Tsv if not satisfies_rule(ts, rule)]
    #print("Tsv_positive:", Tsv_positive[:5])  # Add this line to check the output
    #print("Tsv_negative:", Tsv_negative[:5])  # Add this line to check the output
    return Tsv_positive, Tsv_negative


In [6]:
def check_support(pts, Tsv, min_support=0.1):
    count = sum(1 for ts, _ in Tsv if satisfies_rule(ts, (pts, 0)))
    support = count / len(Tsv)
    return support >= min_support

def calculate_confidence(pts, Tsv):
    satisfying_instances = [(ts, label) for ts, label in Tsv if satisfies_rule(ts, (pts, 0))]
    
    if not satisfying_instances:
        return 0.0
    
    correct_classifications = sum(1 for ts, label in satisfying_instances if determine_class_label([(ts, label)]) == label)
    
    confidence = correct_classifications / len(satisfying_instances)
    
    return confidence


In [7]:
def check_interesting_measures(pts, M, Tsv):
    support = check_support(pts, Tsv)
    confidence = calculate_confidence(pts, Tsv)
    
    return M['support'][0] <= support <= M['support'][1] and M['confidence'][0] <= confidence <= M['confidence'][1]


In [8]:
def generate_candidate_patterns(Tsv, M):
    if not Tsv:
        return None  # Handle case where Tsv is empty
    
    I = set()
    r = []
    
    while True:
        ts, label = random.choice(Tsv)
        i = random.choice([idx for idx in range(len(ts)) if idx not in I])
        I.add(i)
        
        ts_prime, _ = random.choice(Tsv)
        
        pattern_element = (ts[i], ts_prime[i] >= ts[i], i)
        r.append(pattern_element)
        
        sorted_I = sorted(I)
        pts = [(ts[idx], pattern_element[1]) for idx in sorted_I]
        
        if len(pts) < 2:
            continue
        
        if check_support(pts, Tsv) and check_interesting_measures(pts, M, Tsv):
            return pts


In [9]:
def calculate_loss(rpts, Tsv, L):
    loss = 0.0
    return loss

def select_best_pattern_rule(RPTSv, Tsv, L):
    best_rpts = None
    best_loss = float('inf')
    
    for rpts in RPTSv:
        loss = calculate_loss(rpts, Tsv, L)
        if loss < best_loss:
            best_loss = loss
            best_rpts = rpts
    
    return best_rpts


In [10]:
# Define the rpts_tree function
def rpts_tree(Ts, M, L, max_height, min_samples):
    def build_tree(Ts, M, L, current_height):
        v = create_node()
        
        Tsv = Ts
        #print("Tsv in build_tree:", Tsv[:5])  # Add this line to check the input
        
        class_label = determine_class_label(Tsv)
        if class_label is None:
            return v
        
        set_node_label(v, class_label)
        
        if len(Tsv) == 0:
            return v
        
        RPTSv = generate_candidate_patterns(Tsv, M)
        
        if RPTSv is None:
            return v
        
        rptsv = select_best_pattern_rule(RPTSv, Tsv, L)
        
        label_node(v, rptsv)
        
        Tsv_positive, Tsv_negative = split_dataset(Tsv, rptsv)
        
        if current_height >= max_height or len(Tsv) < min_samples:
            return v
        
        v.positive_child = build_tree(Tsv_positive, M, L, current_height + 1)
        v.negative_child = build_tree(Tsv_negative, M, L, current_height + 1)
        
        return v
    
    return build_tree(Ts, M, L, 0)


In [11]:
# Function to print the tree structure
def print_tree(node, level=0):
    indent = "  " * level
    if node is None:
        return
    print(f"{indent}Node: Label={node.label}, Rule={node.rule}")
    print_tree(node.positive_child, level + 1)
    print_tree(node.negative_child, level + 1)


In [13]:
# Load the dataset
file_path = '/Users/majidtavakoli/Documents/Medical_bionformatics/decision support systems/Ecxersices/DataSummary.csv'  # Replace with the actual file path if needed
data = pd.read_csv(file_path)

# Assuming the 'Class' column contains the class label
class_column = 'Class'

# Assuming time-series data are in a specific range of columns
# In this case, we'll assume the time-series data start from the 'Train' column to 'DTW (w=100)'
time_series_columns = data.columns[3:12]  # Adjust the range according to your dataset

# Convert the dataframe to a list of tuples (time_series, label)
Ts = [(row[time_series_columns].apply(pd.to_numeric, errors='coerce').fillna(0).values.tolist(), row[class_column]) for index, row in data.iterrows()]
#print("Converted Ts:", Ts[:5])  # Add this line to check the conversion

# Define the set of interesting measures M
M = {
    'support': (0.1, 1.0),  # Example measure: Support should be between 10% and 100%
    'confidence': (0.5, 1.0)  # Example measure: Confidence should be between 50% and 100%
}

#salad code:
data_list = [ [float(f) for f in x[1].values[0].split()] for x in list(data.iterrows())  ]
x = [l[1:] for l in data_list]
y = [l[0] for l in data_list]
x,y

In [14]:
# Define the loss function L (for example, information gain)
def information_gain(parent, left_child, right_child):
    def entropy(class_counts):
        total = sum(class_counts)
        return -sum((count / total) * math.log2(count / total) for count in class_counts if count != 0)
    
    parent_entropy = entropy([count for _, count in parent.items()])
    left_entropy = entropy([count for _, count in left_child.items()])
    right_entropy = entropy([count for _, count in right_child.items()])
    
    left_weight = sum(left_child.values()) / sum(parent.values())
    right_weight = sum(right_child.values()) / sum(parent.values())
    
    return parent_entropy - (left_weight * left_entropy + right_weight * right_entropy)

L = information_gain


In [15]:
def print_tree(node, depth=0):
    """
    Recursively prints the tree structure.
    
    :param node: The current node in the tree.
    :param depth: The current depth of the node (used for indentation).
    """
    if node is None:
        return
    
    # Print the current node
    print("  " * depth + str(node))
    
    # Recursively print the children (assuming binary tree)
    if hasattr(node, 'left'):
        print_tree(node.left, depth + 1)
    if hasattr(node, 'right'):
        print_tree(node.right, depth + 1)

# Example usage:
# Assuming 'tree' is the root of your tree structure
print_tree(tree)


NameError: name 'tree' is not defined

In [None]:

# Define maximum height of the tree and minimum samples per leaf
max_height = 5
min_samples = 10

# Build the tree
tree = rpts_tree(Ts, M, L, max_height, min_samples)

# Print the tree structure
print_tree(tree)


<__main__.Node object at 0x15c8bf160>


In [None]:
print(tree)

<__main__.Node object at 0x15751c9d0>
