In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pyAgrum as gum
import pyAgrum as skbn
import pyAgrum.lib.bn2graph as gnb2
import pyAgrum.lib.notebook as gnb
import pyAgrum.lib.image as gimg

## Data
Preprocessed dataset from Assignment_1.ipynb

In [None]:
data = pd.read_csv('./data/processed_data.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
data.rename(columns={"cloud coverage":"cloud_coverage"}, inplace=True)
data.rain = data.rain.apply(lambda x: x.replace(" ", "_"))
print(data.shape)
data.head()

In [None]:
assignment_1 = gum.BayesNet("Precipitation Network")

assignment_1.add(gum.LabelizedVariable("season", "season", ["Spring", "Summer", "Autumn", "Winter"]))
assignment_1.add(gum.LabelizedVariable("cloud_coverage", "cloud_coverage", ["low", "high"]))
assignment_1.add(gum.LabelizedVariable("humidity", "humidity", ["low", "medium", "high"]))
assignment_1.add(gum.LabelizedVariable("pressure", "pressure", ["low", "high"]))
assignment_1.add(gum.LabelizedVariable("temp", "temp", ["low", "high"]))
assignment_1.add(gum.LabelizedVariable("rain", "rain", ["no_rain", "rain"]))

assignment_1.addArc("season","temp")
assignment_1.addArcs([("temp","humidity"),("temp","pressure")])
assignment_1.addArcs([("humidity","cloud_coverage"),("humidity","rain")])
assignment_1.addArc("cloud_coverage","rain")
assignment_1.addArc("pressure","rain")

assignment_1

In [None]:
bn = gum.BayesNet("Precipitation Network")
bn.add(gum.LabelizedVariable("season", "season", ["Spring", "Summer", "Autumn", "Winter"]))
bn.add(gum.LabelizedVariable("cloud_coverage", "cloud_coverage", ["low", "high"]))
bn.add(gum.LabelizedVariable("humidity", "humidity", ["low", "medium", "high"]))
bn.add(gum.LabelizedVariable("pressure", "pressure", ["low", "high"]))
bn.add(gum.LabelizedVariable("temp", "temp", ["low", "high"]))
bn.add(gum.LabelizedVariable("rain", "rain", ["no_rain", "rain"]))
gimg.export(bn, "./images/tempalte.png")

bn

## Costraint-based algorithm: MIIC

In [None]:
sample_sizes = [1, 10, 100, 500, 1000, 1500, 2000, 2500, 3000, len(data)]

In [None]:
miic_structural_hamming_distances = []

for size in sample_sizes:
    sampled_data = data.sample(size)
    
    miic = gum.BNLearner(sampled_data, bn)
    miic.useMIIC()
    net = miic.learnBN()
    
    filename = f'./networks/miic_{size}.bif'
    gum.saveBN(net, filename)
    gimg.export(net, f"./images/miic_{size}.png")

    difference = gum.lib.bn_vs_bn.GraphicalBNComparator(net, assignment_1)
    hamming_distance = difference.hamming()
    miic_structural_hamming_distances.append(hamming_distance["structural hamming"])
    print(hamming_distance)

## Score-based: Greedy Hill climb

In [None]:
ghc_structural_hamming_distances = []

for size in sample_sizes:
    sampled_data = data.sample(size)
    
    ghc = gum.BNLearner(sampled_data, bn)
    ghc.useGreedyHillClimbing()
    net = ghc.learnBN()
    
    filename = f'./networks/ghc_{size}.bif'
    gum.saveBN(net, filename)
    gimg.export(net, f"./images/ghc_{size}.png")
    
    difference = gum.lib.bn_vs_bn.GraphicalBNComparator(net, assignment_1)
    hamming_distance = difference.hamming()
    ghc_structural_hamming_distances.append(hamming_distance["structural hamming"])
    print(hamming_distance)

## Networks

### MIIC

In [None]:
miic_nets = []
for size in [10, 100, 1000, 2000, len(data)]:
    net = gum.loadBN(f"./networks/miic_{size}.bif")
    miic_nets.append(net)
    
gnb.flow.row(*miic_nets, captions=[f"miic net for sample size: {x}" for x in [10, 100, 1000, 2000, len(data)]])

### GHC

In [None]:
miic_nets = []
for size in [10, 100, 1000, 2000, len(data)]:
    net = gum.loadBN(f"./networks/ghc_{size}.bif")
    miic_nets.append(net)
    
gnb.flow.row(*miic_nets, captions=[f"miic net for sample size: {x}" for x in [10, 100, 1000, 2000, len(data)]])

## Plots

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(14, 12))

axs[0].plot(sample_sizes, miic_structural_hamming_distances, marker='o', label="MIIC")
axs[0].set_xlabel('Sample Size')
axs[0].set_ylabel('Structural Hamming Distance')
axs[0].set_title('MIIC Structural Hamming Distance per Sample Size')
axs[0].grid(True)
axs[0].legend()

axs[1].plot(sample_sizes, ghc_structural_hamming_distances, marker='o', label="GHC")
axs[1].set_xlabel('Sample Size')
axs[1].set_ylabel('Structural Hamming Distance')
axs[1].set_title('GHC Structural Hamming Distance per Sample Size')
axs[1].grid(True)
axs[1].legend()

plt.tight_layout()
plt.savefig('./images/constraint_hamming.png')
plt.show()