Beer-Liking Prediction Pipeline (Downloadable .ipynb)

In [None]:

*This notebook follows the Milestone 2 structure: Data Prep, Modeling, Evaluation.*

```python


%matplotlib inline

In [None]:
```

*This notebook follows the Milestone 2 structure: Data Prep, Modeling, Evaluation.*

```python


%matplotlib inline

In [None]:
```
 (Colab Script)



1. Setup & Imports

------------------

Install pgmpy if needed

!pip install pgmpy

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
beer_path = "beer_updated.csv"



2. Data Loading & Preprocessing

--------------------------------

In [None]:
from sklearn.preprocessing import LabelEncoder


def load_and_clean_data(beers_path, ratings_path, like_threshold=3.5):
    """
    Load beer attributes and ratings, merge, drop NAs, binarize ratings, encode categorical features.
    """
    og_df = pd.read_csv(beers_path)
    cleaned_df = og_df.copy().drop("rating")
    cleaned_df["bin_rating"] = 0
    for i  in range(len(og_df)):
        rating = og_df.iloc[i]["rating"]
        if rating >= like_threshold:
            cleaned_df.at[i,"bin_rating"] = 1
        else:
            cleaned_df.at[i, "bin_rating"] = 0
    return cleaned_df



Load CSVs

In [None]:
    # beers = pd.read_csv(beers_path)
    # ratings = pd.read_csv(ratings_path)


Merge on 'beer_id'

In [None]:
    df = pd.merge(beers, ratings, on='beer_id', how='inner')


Drop missing values in key columns

In [None]:
    df = df.dropna(subset=['abv', 'ibu', 'ounces', 'style', 'rating_score']).reset_index(drop=True)


Binarize ratings into 'Like' / 'Dislike'

In [None]:
    df['Like'] = (df['rating_score'] >= like_threshold).astype(int)


Encode 'style' as numeric

In [None]:
    le = LabelEncoder()
    df['Style_enc'] = le.fit_transform(df['style'])
    return df, le



Example usage:

df, style_encoder = load_and_clean_data('beers.csv', 'ratings.csv')

--------------------------------------------------

Preprocessing demonstration

--------------------------------------------------

df.head() shows combined and processed data

In [None]:
try:
    df, style_encoder
except NameError:
    df, style_encoder = load_and_clean_data('beers.csv', 'ratings.csv')
print(df.head())



3. Exploratory Analysis Exploratory Analysis

-----------------------

In [None]:
def explore_variables(df):
    """
    Display info, stats, and basic plots.
    """
    print(df.info())
    print(df.describe())
    df[['abv','ibu','ounces']].hist(bins=30, figsize=(12,4))
    top_styles = df['style'].value_counts().nlargest(10)
    top_styles.plot.bar(figsize=(8,4), title='Top 10 Beer Styles')




4. Train/Test Split

-------------------

In [None]:
def split_data(df, test_size=0.2, random_state=42):
    X = df[['abv','ibu','ounces','style']]
    y = df['Like']
    return train_test_split(X, y, test_size=test_size, random_state=random_state)




5. Model Structure & Parameter Learning

---------------------------------------

In [None]:
model = BayesianModel([
    ('abv', 'Like'),
    ('ibu', 'Like'),
    ('ounces', 'Like'),
    ('style', 'Like')
])

def learn_parameters(model, df):
    """Fit the BayesianModel using Maximum Likelihood Estimation with Laplace smoothing."""
    model.fit(df, estimator=MaximumLikelihoodEstimator, prior_type='laplace')
    return model




6. Training

-----------

Example usage:

df = load_and_clean_data('beers.csv', 'ratings.csv')

X_train, X_test, y_train, y_test = split_data(df)

train_df = X_train.copy(); train_df['Like'] = y_train

trained_model = learn_parameters(model, train_df)

7. Evaluation

-------------

In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return acc, prec, rec, f1, cm




8. Future Work

--------------

- Compare to scikit-learn's CategoricalNB/GaussianNB

- Structure learning: Tree-Augmented NB

- Add contextual features (weather, mood)

- Build a simple UI in Streamlit

To download this as a .ipynb file, run the following Python code in a Colab cell:

```python

import nbformat as nbf

Read this script's text

with open('/content/beer_pipeline_colab.py') as f:

script = f.read().splitlines()

nb = nbf.v4.new_notebook()

cells = []

for line in script:

if line.startswith('# %% [markdown]'):

cells.append(nbf.v4.new_markdown_cell('

In [None]:
'.join(script[script.index(line)+1: script.index(line)+1])))


For brevity, assume script is preformatted into notebook cells

Instead, you can manually save this notebook via File > Download .ipynb

```