In [13]:
# packages
import pandas as pd
from mod02_build_bot_predictor import train_model

### Define a function to extract predictions from the model

In [14]:
def predict_bot(df, model=None):
    """
    Predict whether each account is a bot (1) or human (0).
    """
    if model is None:
        model = train_model()

    preds = model.predict(df)
    return pd.Series(preds, index=df.index)

### Define a function to evaluate model error

In [15]:
def confusion_matrix_and_metrics(y_true, y_pred):
    """
    Computes confusion matrix and common error rates for binary classification.

    Assumes labels:
      0 = negative class
      1 = positive class

    Returns:
      dict with:
        tn, fp, fn, tp
        misclassification_rate
        false_positive_rate
        false_negative_rate
    """
    tn = fp = fn = tp = 0

    for yt, yp in zip(y_true, y_pred):
        if yt == 0 and yp == 0:
            tn += 1
        elif yt == 0 and yp == 1:
            fp += 1
        elif yt == 1 and yp == 0:
            fn += 1
        elif yt == 1 and yp == 1:
            tp += 1
        else:
            raise ValueError("Labels must be 0 or 1")

    total = tn + fp + fn + tp

    misclassification_rate = (fp + fn) / total if total > 0 else 0.0
    false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0.0
    false_negative_rate = fn / (fn + tp) if (fn + tp) > 0 else 0.0

    return {
        "tp": tp,
        "tn": tn,
        "fp": fp,
        "fn": fn,
        "misclassification_rate": misclassification_rate,
        "false_positive_rate": false_positive_rate,
        "false_negative_rate": false_negative_rate,
    }


### Load the data

In [16]:
TRAIN_PATH = "mod02_data/train.csv"
train = pd.read_csv(TRAIN_PATH)

TEST_PATH = "mod02_data/test.csv"
test = pd.read_csv(TEST_PATH)

### Format the data by independent vs. dependent variables

In [17]:
X_train = train.drop(columns=["is_bot"])
y_train = train['is_bot']

X_test = test.drop(columns=["is_bot"])
y_test = test['is_bot']

### Build the model on training data

In [18]:
model = train_model(X_train, y_train)

### Get the model predictions on training and test data

In [19]:
y_pred_train = predict_bot(X_train, model)
y_pred_test = predict_bot(X_test, model)

### Check results on the training set (data used to build the model)

In [20]:
confusion_matrix_and_metrics(y_train, y_pred_train)

{'tp': 164,
 'tn': 2637,
 'fp': 0,
 'fn': 199,
 'misclassification_rate': 0.06633333333333333,
 'false_positive_rate': 0.0,
 'false_negative_rate': 0.5482093663911846}

### Check results on the test set (new data not yet seen by the model)

In [21]:
confusion_matrix_and_metrics(y_test, y_pred_test)

{'tp': 14,
 'tn': 863,
 'fp': 11,
 'fn': 112,
 'misclassification_rate': 0.123,
 'false_positive_rate': 0.012585812356979404,
 'false_negative_rate': 0.8888888888888888}

# Discussion Questions

### Based on the misclassification rate of your model, discuss your confidence in the ability to predict a bot. 

My confidence in predicting a bot is uncertain but I feel on practicing predicting the bot I could learn to predict fairly accurately as there is a 1/9 error on mistakes for false negatives.

### What are potential ramifications of false positives from the model?

the model confidently tells you information that you are looking for that is incorrect; therefore if the model is used for particular things such as automatic driving, a false positive of there being something in the road can cause sudden fast deceleration and problematic consequences such as crashes from cars behind it, traffic, or swerving.

### What are potential ramifications of false negatives from the model?

The model may miss telling you some crucial information that you are looking for which can be problematic/tell you confidently false information.  For example, if the model was trained to detect cancer from some arbitrary data, if the model returns a false negative, the cancer can go unseen and allowed to progress into more aggressive versions of itself.  Following the self-driving cars as well, if a car has a false negative on there being an entity in the road, it may crash and hit the object in front of it, which can kill pedestrians, cause accidents, and major property damage if it swerves off the road.