In [2]:
from mysklearn.mypytable import MyPyTable
import mysklearn.myutils as myutils
from mysklearn.myclassifiers import MyDummyClassifier


table = MyPyTable().load_from_file("cleaned_tracks.csv")

def categorize_popularity(p):
    p = int(p)
    if p <= 33:
        return "Low"
    elif p <= 66:
        return "Medium"
    else:
        return "High"

pop_idx = table.column_names.index("popularity")
y = [categorize_popularity(row[pop_idx]) for row in table.data]

explicit_idx = table.column_names.index("explicit")
duration_idx = table.column_names.index("duration_ms")
dance_idx = table.column_names.index("danceability")
energy_idx = table.column_names.index("energy")
tempo_idx = table.column_names.index("tempo")
loudness_idx = table.column_names.index("loudness")

X = []
for row in table.data:
    X.append([
        row[explicit_idx],
        row[duration_idx],
        row[dance_idx],
        row[energy_idx],
        row[tempo_idx],
        row[loudness_idx]
    ])

dummyClf = MyDummyClassifier()
dummyClf.fit(X, y)

# Predict an example song
example_song = [
    1,         # explicit
    150000,    # duration_ms
    0.25,      # danceability
    0.50,      # energy
    -8.0       # loudness
]

print("Prediction:", dummyClf.predict([example_song]))

Prediction: ['Low']


In [3]:
import mysklearn.myevaluation as myevaluation

dummy_acc, dummy_err, dummy_tp, dummy_p = myutils.cross_val_predict(X, y, MyDummyClassifier)

print(f"Dummy acc: {dummy_acc:.2f} Dummy err: {dummy_err:.2f}")

dummy_prec = myevaluation.binary_precision_score(dummy_tp, dummy_p)

print(f"Dummy precision: {dummy_prec:.2f}")

dummy_recall = myevaluation.binary_recall_score(dummy_tp, dummy_p)

print(f"Dummy recall: {dummy_recall:.2f}")

dummy_f1 = myevaluation.binary_f1_score(dummy_tp, dummy_p)

print(f"Dummy f1: {dummy_f1:.2f}")

Dummy acc: 0.32 Dummy err: 0.68
Dummy precision: 0.32
Dummy recall: 0.38
Dummy f1: 0.35


In [4]:
labels = ["Low", "Medium", "High"]
matrix = [[0 for _ in labels] for _ in labels]
label_to_idx = {label: i for i, label in enumerate(labels)}

for t, p in zip(dummy_tp, dummy_p):
    i = label_to_idx[t]
    j = label_to_idx[p]
    matrix[i][j] += 1

myutils.print_confusion_matrix(labels, matrix, "Random Forest Confusion Matrix")

Random Forest Confusion Matrix
                                   ['Low', 'Medium', 'High']
------  ----  ----  ----  -----  ---------------------------
Low     3276  3440  4651  11367                           29
Medium  3420  3309  4638  11367                           29
High    3535  3481  4351  11367                           38
