Projekt Elementy Sztucznej Inteligencji - Julia Baranowska, Aleksander Folfas, Julia Głowa

Import niezbędnych bibliotek

In [1]:
import pandas as pd
import numpy as np
from math import log2
import networkx as nx
import matplotlib.pyplot as plt

Definicja funkcji, która na podstawie podanych danych obliczna wartości entropii oraz je sumuje

In [2]:
def entropy(data):
    # Get the target column and its unique values
    target_col = data.iloc[:, -1]
    unique_vals = target_col.unique()
    
    # Calculate entropy for each unique value
    entropy_val = []
    for val in unique_vals:
        p = (target_col == val).sum() / len(target_col)
        entropy_val.append(-p * log2(p))
    
    # Return the sum of entropy values
    return sum(entropy_val)

Definicja funkcji, która oblicza wartość przesłanki

In [3]:
def information_gain(data, feature):
    # Calculate entropy before splitting
    before_split = entropy(data)
    
    # Get unique values of the feature
    feature_vals = data[feature].unique()
    
    # Calculate entropy after splitting for each value of the feature
    after_split = 0
    for val in feature_vals:
        subset = data[data[feature] == val]
        after_split += len(subset) / len(data) * entropy(subset)
    
    # Calculate information gain
    return before_split - after_split

Definicja funkcji, która wybiera najlepszą przesłankę (dającą najwięcej informacji)

In [4]:
def best_feature(data):
    # Get all feature names except the target column
    feature_cols = data.columns[:-1]
    
    # Calculate information gain for each feature
    info_gain = []
    for feature in feature_cols:
        info_gain.append(information_gain(data, feature))
    
    # Get the index of the feature with the highest information gain
    best_index = np.argmax(info_gain)
    
    # Return the name of the best feature
    return feature_cols[best_index]

Definicja funkcji, która rekurencyjnie buduje drzewo decyzyjne

In [5]:
def build_tree(data):
    # Base case: if all instances have the same target value, return that value
    if len(data.iloc[:, -1].unique()) == 1:
        return data.iloc[0, -1]
    
    # Base case: if there are no more features to split on, return the most common target value
    if len(data.columns) == 1:
        return data.iloc[:, -1].mode()[0]
    
    # Get the best feature to split on
    best_feature_name = best_feature(data)
    
    # Create the tree
    tree = {best_feature_name: {}}
    for val in data[best_feature_name].unique():
        subset = data[data[best_feature_name] == val].drop(best_feature_name, axis=1)
        subtree = build_tree(subset)
        tree[best_feature_name][val] = subtree
    
    return tree

Definicja funkcji, która przewiduje wartości na podstawie zbudowanego drzewa oraz przesłanek

In [6]:
def predict(tree, data):
    for feature in tree.keys():
        value = data[feature]
        subtree = tree[feature][value]
        if isinstance(subtree, dict):
            return predict(subtree, data)
        else:
            return subtree

Wczytanie zestwu danych treningowych oraz testowych jako pandas dataframe

In [7]:
data = pd.read_csv('Training_Data.csv', delimiter=';', index_col=False)
test = pd.read_csv('Test_Data.csv', delimiter=';', index_col=False)

Stworzenie dzrzewa decyzyjnego za pomocą funckcji build_tree oraz wypisanie rezultatów

In [8]:
tree = build_tree(data)
print(tree)

{'HTML': {0: {'Java': {0: {'Python': {1: {'BasicEnglish': {1: {'Bachelor': {0: {'NoEducation': {1: {'NoExperience': {1: 0, 0: {'StudentStatus': {1: {'AcademicExperience': {1: 0, 0: 1}}, 0: {'AcademicExperience': {1: 1, 0: 0}}}}}}, 0: {'NoExperience': {1: {'StudentStatus': {1: 0, 0: 1}}, 0: 1}}}}, 1: {'NoExperience': {1: {'StudentStatus': {1: 0, 0: 1}}, 0: {'StudentStatus': {1: {'AcademicExperience': {1: 3, 0: 1}}, 0: {'AcademicExperience': {1: 1, 0: 3}}}}}}}}, 0: {'Master': {0: {'AcademicExperience': {0: {'StudentStatus': {1: {'NoEducation': {1: {'AdvancedEnglish': {1: 3, 0: {'NoExperience': {1: 1, 0: 0}}}}, 0: {'NoExperience': {1: {'AdvancedEnglish': {1: 0, 0: 1}}, 0: {'AdvancedEnglish': {1: 1, 0: 3}}}}}}, 0: {'NoEducation': {1: 1, 0: {'NoExperience': {1: {'AdvancedEnglish': {1: 1, 0: 3}}, 0: 1}}}}}}, 1: {'StudentStatus': {1: 1, 0: {'NoEducation': {1: {'AdvancedEnglish': {1: 0, 0: 1}}, 0: 1}}}}}}, 1: {'AcademicExperience': {0: 1, 1: {'StudentStatus': {1: {'AdvancedEnglish': {1: 3, 0: 

Stworzenie predykcji na zestawie testowych z wykorzystaniem drzewa decyzyjnego 

In [9]:
# Make predictions using the decision tree
predictions = []
for i in range(len(test)):
    instance = test.iloc[i, :-1]
    predictions.append(predict(tree, instance))

# Print the predictions
print(predictions)

[3, 2, 1, 1, 3, 2, 1, 1, 3, 2]
