# Model creation

In [10]:
import pandas as pd
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

## Dataset

In [5]:
df = pd.read_csv(
    "drug_consumption_2.txt",
    header=None,
    names=[
        "idx",
        "N-Score",
        "E-Score",
        "O-Score",
        "A-Score",
        "C-Score",
        "Impusiveness",
        "Sensation seeking",
        "Amphet",
        "Benzo",
        "Cannabis",
        "Heroin",
        "Ketamine",
        "Methadone",
        "Semeron",
    ],
)

df.head()

Unnamed: 0,idx,N-Score,E-Score,O-Score,A-Score,C-Score,Impusiveness,Sensation seeking,Amphet,Benzo,Cannabis,Heroin,Ketamine,Methadone,Semeron
0,1,0.31287,-0.57545,-0.58331,-0.91699,-0.00665,-0.21712,-1.18084,0,0,0,0,0,0,0
1,2,-0.67825,1.93886,1.43533,0.76096,-0.14277,-0.71126,-0.21575,0,0,0,0,0,0,0
2,3,-0.46725,0.80523,-0.84732,-1.6209,-1.0145,-1.37983,0.40148,0,0,0,0,0,0,0
3,4,-0.14882,-0.80615,-0.01928,0.59042,0.58489,-1.37983,-1.18084,0,0,0,0,0,0,0
4,5,0.73545,-1.6334,-0.45174,-0.30172,1.30612,-0.21712,-0.21575,0,0,0,0,0,0,0


## Decision Tree

In [6]:
models = {}
metrics = {}

for target in df.iloc[:, 8:15].columns:
    # Get train and test data splits, stratisfy for target.
    target_train_df, target_test_df = train_test_split(
        df, train_size=0.8, shuffle=True, stratify=df[target], random_state=0
    )

    # Get input and target from the data split.
    target_x_train, target_y_train = (
        target_train_df.iloc[:, 1:8],
        target_train_df.iloc[:, 8:15][target],
    )
    target_x_test, target_y_test = (
        target_test_df.iloc[:, 1:8],
        target_test_df.iloc[:, 8:15][target],
    )

    # Create classifier.
    target_clf = DecisionTreeClassifier()

    # Train model with data specified for target.
    target_clf.fit(target_x_train, target_y_train)
    models[target] = target_clf

    # Evaluate trained classifier.
    target_y_predictions = target_clf.predict(target_x_test)

    # Calculate metrics.
    accuracy = accuracy_score(target_y_test, target_y_predictions)
    precision = precision_score(target_y_test, target_y_predictions)
    recall = recall_score(target_y_test, target_y_predictions)
    f1 = f1_score(target_y_test, target_y_predictions)

    metrics[target] = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
    }

models, metrics

  _warn_prf(average, modifier, msg_start, len(result))


({'Amphet': DecisionTreeClassifier(),
  'Benzo': DecisionTreeClassifier(),
  'Cannabis': DecisionTreeClassifier(),
  'Heroin': DecisionTreeClassifier(),
  'Ketamine': DecisionTreeClassifier(),
  'Methadone': DecisionTreeClassifier(),
  'Semeron': DecisionTreeClassifier()},
 {'Amphet': {'accuracy': 0.8912466843501327,
   'precision': 0.043478260869565216,
   'recall': 0.05,
   'f1_score': 0.046511627906976744},
  'Benzo': {'accuracy': 0.8938992042440318,
   'precision': 0.1111111111111111,
   'recall': 0.15789473684210525,
   'f1_score': 0.13043478260869565},
  'Cannabis': {'accuracy': 0.6312997347480106,
   'precision': 0.28703703703703703,
   'recall': 0.3333333333333333,
   'f1_score': 0.30845771144278605},
  'Heroin': {'accuracy': 0.9655172413793104,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Ketamine': {'accuracy': 0.9973474801061007,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Methadone': {'accuracy': 0.9177718832891246,
   'precision': 0.136

## K-NN

In [None]:
models = {}
metrics = {}

for target in df.iloc[:, 8:15].columns:
    # Get train and test data splits, stratisfy for target.
    target_train_df, target_test_df = train_test_split(
        df, train_size=0.8, shuffle=True, stratify=df[target], random_state=0
    )

    # Get input and target from the data split.
    target_x_train, target_y_train = (
        target_train_df.iloc[:, 1:8],
        target_train_df.iloc[:, 8:15][target],
    )
    target_x_test, target_y_test = (
        target_test_df.iloc[:, 1:8],
        target_test_df.iloc[:, 8:15][target],
    )

    # Create classifier.
    target_neigh = KNeighborsClassifier(n_neighbors=5)

    # Train model with data specified for target.
    target_neigh.fit(target_x_train, target_y_train)
    models[target] = target_neigh

    # Evaluate trained classifier.
    target_y_predictions = models[target].predict(target_x_test)

    # Calculate metrics.
    accuracy = accuracy_score(target_y_test, target_y_predictions)
    precision = precision_score(target_y_test, target_y_predictions)
    recall = recall_score(target_y_test, target_y_predictions)
    f1 = f1_score(target_y_test, target_y_predictions)

    metrics[target] = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
    }

models, metrics

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


({'Amphet': KNeighborsClassifier(),
  'Benzo': KNeighborsClassifier(),
  'Cannabis': KNeighborsClassifier(),
  'Heroin': KNeighborsClassifier(),
  'Ketamine': KNeighborsClassifier(),
  'Methadone': KNeighborsClassifier(),
  'Semeron': KNeighborsClassifier()},
 {'Amphet': {'accuracy': 0.9442970822281167,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Benzo': {'accuracy': 0.9389920424403183,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Cannabis': {'accuracy': 0.7055702917771883,
   'precision': 0.37142857142857144,
   'recall': 0.27956989247311825,
   'f1_score': 0.31901840490797545},
  'Heroin': {'accuracy': 0.9840848806366048,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Ketamine': {'accuracy': 0.9973474801061007,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Methadone': {'accuracy': 0.9602122015915119,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Semeron': {'accuracy': 0.9946949602122016,
   'precis

## Random Forest

In [None]:
models = {}
metrics = {}

for target in df.iloc[:, 8:15].columns:
    # Get train and test data splits, stratisfy for target.
    target_train_df, target_test_df = train_test_split(
        df, train_size=0.8, shuffle=True, stratify=df[target], random_state=0
    )

    # Get input and target from the data split.
    target_x_train, target_y_train = (
        target_train_df.iloc[:, 1:8],
        target_train_df.iloc[:, 8:15][target],
    )
    target_x_test, target_y_test = (
        target_test_df.iloc[:, 1:8],
        target_test_df.iloc[:, 8:15][target],
    )

    # Create classifier.
    target_clf = RandomForestClassifier(n_estimators=100, random_state=0)

    # Train model with data specified for target.
    target_clf.fit(target_x_train, target_y_train)
    models[target] = target_clf

    # Evaluate trained classifier.
    target_y_predictions = target_clf.predict(target_x_test)

    # Calculate metrics.
    accuracy = accuracy_score(target_y_test, target_y_predictions)
    precision = precision_score(target_y_test, target_y_predictions)
    recall = recall_score(target_y_test, target_y_predictions)
    f1 = f1_score(target_y_test, target_y_predictions)

    metrics[target] = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
    }

models, metrics

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


({'Amphet': RandomForestClassifier(random_state=0),
  'Benzo': RandomForestClassifier(random_state=0),
  'Cannabis': RandomForestClassifier(random_state=0),
  'Heroin': RandomForestClassifier(random_state=0),
  'Ketamine': RandomForestClassifier(random_state=0),
  'Methadone': RandomForestClassifier(random_state=0),
  'Semeron': RandomForestClassifier(random_state=0)},
 {'Amphet': {'accuracy': 0.946949602122016,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Benzo': {'accuracy': 0.946949602122016,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Cannabis': {'accuracy': 0.7347480106100795,
   'precision': 0.41025641025641024,
   'recall': 0.17204301075268819,
   'f1_score': 0.24242424242424246},
  'Heroin': {'accuracy': 0.9840848806366048,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Ketamine': {'accuracy': 0.9973474801061007,
   'precision': 0.0,
   'recall': 0.0,
   'f1_score': 0.0},
  'Methadone': {'accuracy': 0.9602122015915119,
   'pr

## ANN

In [9]:
def create_ann_model():
    model = tf.keras.models.Sequential(
        [
            tf.keras.layers.Dense(64, activation="relu", input_shape=(7,)),
            tf.keras.layers.Dense(32, activation="relu"),
            tf.keras.layers.Dense(1, activation="sigmoid"),
        ]
    )

    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=["accuracy", "precision", "recall"],
    )

    return model

In [None]:
models = {}
metrics = {}

for target in df.iloc[:, 8:15].columns:
    # Get train and test data splits, stratisfy for target.
    target_train_df, target_test_df = train_test_split(
        df, train_size=0.8, shuffle=True, stratify=df[target], random_state=0
    )

    # Get input and target from the data split.
    target_x_train, target_y_train = (
        target_train_df.iloc[:, 1:8],
        target_train_df.iloc[:, 8:15][target],
    )
    target_x_test, target_y_test = (
        target_test_df.iloc[:, 1:8],
        target_test_df.iloc[:, 8:15][target],
    )

    # Create classifier.
    target_model = create_ann_model()

    # Train model with data specified for target.
    target_model.fit(target_x_train, target_y_train)
    models[target] = target_model

    # Evaluate trained classifier.
    target_y_predictions = (target_model.predict(target_x_test) >= 0.5).astype("int32")

    # Calculate metrics.
    accuracy = accuracy_score(target_y_test, target_y_predictions)
    precision = precision_score(target_y_test, target_y_predictions)
    recall = recall_score(target_y_test, target_y_predictions)
    f1 = f1_score(target_y_test, target_y_predictions)

    metrics[target] = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
    }

models, metrics