# Parameter Estimation

### a. [22.5 pts] Perform parameter estimation using the Maximum Likelihood algorithm. Print out CPDs and local independencies of the network

In [22]:
import pandas as pd

# Load dataset
df = pd.read_csv("auto-mpg.csv")

# Discretize continuous values using median
for column in df.columns:
    if df[column].dtype != 'object':  # Check if column is numeric
        median_val = df[column].median()  # Calculate median
        df[column] = pd.cut(df[column], bins=[float('-inf'), median_val, float('inf')], labels=['low', 'high'])

# Display first few rows of the dataset
print(df.head())
data=df

   mpg cylinders displacement horsepower weight acceleration model year  \
0  low      high         high       high   high          low        low   
1  low      high         high       high   high          low        low   
2  low      high         high       high   high          low        low   
3  low      high         high       high   high          low        low   
4  low      high         high       high   high          low        low   

  origin                   car name  
0    low  chevrolet chevelle malibu  
1    low          buick skylark 320  
2    low         plymouth satellite  
3    low              amc rebel sst  
4    low                ford torino  


In [23]:
print("[('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'), ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'), ('weight', 'mpg'), ('weight', 'displacement')]")

[('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'), ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'), ('weight', 'mpg'), ('weight', 'displacement')]


In [24]:
from pgmpy.models import BayesianNetwork

model = BayesianNetwork([('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'), ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'), ('weight', 'mpg'), ('weight', 'displacement')])

In [25]:
from pgmpy.estimators import ParameterEstimator
pe = ParameterEstimator(model, data)

In [26]:
from pgmpy.estimators import MaximumLikelihoodEstimator
mle = MaximumLikelihoodEstimator(model, data)

In [27]:
print(mle.estimate_cpd('mpg'))  # unconditional

+-----------+---------------------+-------------+
| weight    | weight(high)        | weight(low) |
+-----------+---------------------+-------------+
| mpg(high) | 0.04081632653061224 | 0.94        |
+-----------+---------------------+-------------+
| mpg(low)  | 0.9591836734693877  | 0.06        |
+-----------+---------------------+-------------+


In [28]:
# Calibrate all CPDs of `model` using MLE:
model.fit(data, estimator=MaximumLikelihoodEstimator)

In [29]:
from pgmpy.estimators import BayesianEstimator
est = BayesianEstimator(model, data)


In [30]:
print(est.estimate_cpd('acceleration', prior_type='BDeu', equivalent_sample_size=10))

+--------------------+---------------------+---------------------+
| cylinders          | cylinders(high)     | cylinders(low)      |
+--------------------+---------------------+---------------------+
| acceleration(high) | 0.12790697674418605 | 0.7045454545454546  |
+--------------------+---------------------+---------------------+
| acceleration(low)  | 0.872093023255814   | 0.29545454545454547 |
+--------------------+---------------------+---------------------+


In [31]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import BayesianEstimator
model = BayesianNetwork([('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'), ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'), ('weight', 'mpg'), ('weight', 'displacement')])
model.fit(data, estimator=BayesianEstimator, prior_type="BDeu") # default equivalent_sample_size=5
for cpd in model.get_cpds():
    print(cpd)

+-----------+---------------------+---------------------+
| weight    | weight(high)        | weight(low)         |
+-----------+---------------------+---------------------+
| mpg(high) | 0.06310679611650485 | 0.919047619047619   |
+-----------+---------------------+---------------------+
| mpg(low)  | 0.9368932038834952  | 0.08095238095238096 |
+-----------+---------------------+---------------------+
+--------------------+---------------------+----------------------+---------------------+---------------------+
| mpg                | mpg(high)           | mpg(high)            | mpg(low)            | mpg(low)            |
+--------------------+---------------------+----------------------+---------------------+---------------------+
| weight             | weight(high)        | weight(low)          | weight(high)        | weight(low)         |
+--------------------+---------------------+----------------------+---------------------+---------------------+
| displacement(high) | 0.192307692

# b. [22.5 pts] Perform parameter estimation using the Expectation Maximization algorithm. Print out CPDs and local independencies of the network.

In [32]:
import pandas as pd


In [33]:

# Load dataset
df = pd.read_csv("auto-mpg.csv")

# Discretize continuous values using median
for column in df.columns:
    if df[column].dtype != 'object':  # Check if column is numeric
        median_val = df[column].median()  # Calculate median
        df[column] = pd.cut(df[column], bins=[float('-inf'), median_val, float('inf')], labels=['low', 'high'])

# Display first few rows of the dataset
print(df.head())
data=df

   mpg cylinders displacement horsepower weight acceleration model year  \
0  low      high         high       high   high          low        low   
1  low      high         high       high   high          low        low   
2  low      high         high       high   high          low        low   
3  low      high         high       high   high          low        low   
4  low      high         high       high   high          low        low   

  origin                   car name  
0    low  chevrolet chevelle malibu  
1    low          buick skylark 320  
2    low         plymouth satellite  
3    low              amc rebel sst  
4    low                ford torino  


In [34]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import ExpectationMaximization

model = BayesianNetwork([('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'), ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'), ('weight', 'mpg'), ('weight', 'displacement')])
estimator = ExpectationMaximization(model, data)

In [35]:
from pgmpy.estimators import ExpectationMaximization as EM

model = BayesianNetwork([('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'), ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'), ('weight', 'mpg'), ('weight', 'displacement')])

estimator = EM(model, data)
estimator.get_parameters(latent_card={'mpg': 2})

  0%|          | 0/100 [00:00<?, ?it/s]

[<TabularCPD representing P(mpg:2 | weight:2) at 0x216f847e9d0>,
 <TabularCPD representing P(displacement:2 | mpg:2, weight:2) at 0x216f847ef70>,
 <TabularCPD representing P(horsepower:2 | mpg:2) at 0x216f847e940>,
 <TabularCPD representing P(cylinders:2 | horsepower:2) at 0x216f8486d00>,
 <TabularCPD representing P(acceleration:2 | cylinders:2) at 0x216eb477700>,
 <TabularCPD representing P(origin:2 | displacement:2) at 0x216f8407c10>,
 <TabularCPD representing P(model year:2 | displacement:2) at 0x216f8407610>,
 <TabularCPD representing P(weight:2) at 0x216f8407910>]

In [36]:
from pgmpy.models import BayesianNetwork
from pgmpy.independencies import IndependenceAssertion, Independencies

# Define the structure of the Bayesian network
model = BayesianNetwork([('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'),
                         ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'),
                         ('weight', 'mpg'), ('weight', 'displacement')])

# Define the independencies based on the structure
independencies = Independencies()

# Add independencies based on the structure of the Bayesian network
independencies.add_assertions(
    IndependenceAssertion('cylinders', 'displacement', ['mpg', 'horsepower']),
    IndependenceAssertion('origin', 'mpg', ['displacement', 'model year']),
    IndependenceAssertion('origin', 'mpg', ['displacement', 'model year', 'horsepower']),
    IndependenceAssertion('model year', 'mpg', ['displacement']),
    IndependenceAssertion('acceleration', 'mpg', ['cylinders', 'displacement', 'horsepower']),
    IndependenceAssertion('acceleration', 'mpg', ['cylinders', 'displacement', 'horsepower', 'weight'])
)

# Print the local independencies
print("Local independencies:")
print(independencies)


Local independencies:
(cylinders ⟂ displacement | horsepower, mpg)
(origin ⟂ mpg | displacement, model year)
(origin ⟂ mpg | displacement, horsepower, model year)
(model year ⟂ mpg | displacement)
(acceleration ⟂ mpg | displacement, cylinders, horsepower)
(acceleration ⟂ mpg | displacement, cylinders, weight, horsepower)


In [37]:
from pgmpy.base import DAG
chain = DAG([('mpg', 'displacement'), ('mpg', 'horsepower'), ('cylinders', 'acceleration'),
                         ('displacement', 'origin'), ('displacement', 'model year'), ('horsepower', 'cylinders'),
                         ('weight', 'mpg'), ('weight', 'displacement')])
chain.get_independencies()

(weight ⟂ horsepower, cylinders, acceleration | mpg)
(weight ⟂ origin, model year | displacement)
(weight ⟂ cylinders, acceleration | horsepower)
(weight ⟂ acceleration | cylinders)
(weight ⟂ horsepower, cylinders, acceleration | model year, mpg)
(weight ⟂ origin | displacement, model year)
(weight ⟂ cylinders, acceleration | horsepower, model year)
(weight ⟂ acceleration | cylinders, model year)
(weight ⟂ model year, origin, horsepower, cylinders, acceleration | displacement, mpg)
(weight ⟂ cylinders, acceleration | horsepower, mpg)
(weight ⟂ horsepower, cylinders, acceleration | origin, mpg)
(weight ⟂ horsepower, acceleration | cylinders, mpg)
(weight ⟂ horsepower, cylinders | acceleration, mpg)
(weight ⟂ origin, cylinders, model year, acceleration | displacement, horsepower)
(weight ⟂ model year | displacement, origin)
(weight ⟂ origin, model year, acceleration | displacement, cylinders)
(weight ⟂ origin, model year | displacement, acceleration)
(weight ⟂ cylinders, acceleration | o