### The mutual information (MI) between two quantities is a measure of the extent to which knowledge of one quantity reduces uncertainty about the other.

In [9]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_regression

df = pd.read_csv('../CSVs/autos.csv')

## Scikit-learn has two mutual information metrics

In [5]:
# The scikit-learn algorithm for MI treats discrete features differently from continuous features.
# Consequently, you need to tell it which are which. As a rule of thumb, anything that must have a float dtype is not discrete.

X = df.copy()
y = X.pop("price")

# Label encoding for categoricals
for colname in X.select_dtypes("object"):
    X[colname], _ = X[colname].factorize()

# All discrete features should now have integer dtypes (double-check this before using MI!)
discrete_features = X.dtypes == int

In [8]:
# Computes the MI scores for our features and wraps them up in a nice dataframe.
def make_mi_scores(X, y, discrete_features):
    mi_scores = mutual_info_regression(X, y, discrete_features=discrete_features)
    mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
    mi_scores = mi_scores.sort_values(ascending=False)
    return mi_scores

mi_scores = make_mi_scores(X, y, discrete_features)
mi_scores[::3]  # show a few features with their MI scores

curb_weight          0.918322
highway_mpg          0.870642
length               0.612999
fuel_system          0.471325
num_of_cylinders     0.332440
symboling            0.234179
compression_ratio    0.096908
fuel_type            0.047279
Name: MI Scores, dtype: float64

In [10]:
def plot_mi_scores(scores):
    scores = scores.sort_values(ascending=True)
    width = np.arange(len(scores))
    ticks = list(scores.index)
    plt.barh(width, scores)
    plt.yticks(width, ticks)
    plt.title("Mutual Information Scores")


plt.figure(dpi=100, figsize=(8, 5))
plot_mi_scores(mi_scores)

NameError: name 'np' is not defined

<Figure size 800x500 with 0 Axes>