In [None]:
import joblib
import numpy as np
import tensorflow as tf

# Load dataset
df = pd.read_csv('music_recommendation_dataset_with_separate_ranges1.csv')

## Feature Selection - Modified to reduce accuracy ##
# Using only the most basic features plus one continuous feature
features = ['Emotion', 'Time', 'Weather', 'Tempo_Min']
target = 'Final Playlist'

X = df[features]
y = df[target]

# Reduce number of classes by binning (helps prevent perfect accuracy)
num_bins = 15  # Adjust this to control accuracy
y = pd.cut(y, bins=num_bins, labels=False) + 1  # Convert to 1-num_bins

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

## Preprocessing ##
numeric_features = ['Tempo_Min']
categorical_features = ['Emotion', 'Time', 'Weather']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Preprocess with added noise
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Add Gaussian noise to features to prevent overfitting
noise_level = 0.05  # Adjust this to control accuracy
X_train_processed = X_train_processed + np.random.normal(0, noise_level, X_train_processed.shape)

# Determine output classes
num_classes = len(np.unique(y))
input_shape = X_train_processed.shape[1]

# Convert labels to one-hot encoding with label smoothing
smoothing_factor = 0.1  # Adds uncertainty to labels
y_train_encoded = tf.keras.utils.to_categorical(y_train - 1, num_classes=num_classes)
y_train_encoded = y_train_encoded * (1 - smoothing_factor) + (smoothing_factor / num_classes)

y_test_encoded = tf.keras.utils.to_categorical(y_test - 1, num_classes=num_classes)

## Neural Network Architecture - Simplified with more regularization ##
model = Sequential([
    Dense(32, activation='relu', input_shape=(input_shape,),
          kernel_regularizer=l2(0.01)),  # Increased L2 regularization
    Dropout(0.6),  # Increased dropout
    BatchNormalization(),

    Dense(16, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    BatchNormalization(),

    Dense(num_classes, activation='softmax')
])

# Compile with higher learning rate
optimizer = Adam(learning_rate=0.005)  # Increased learning rate
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']          
)

# Callbacks - More aggressive early stopping
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=3)
]

# Train with smaller batch size and fewer epochs
history = model.fit(
    X_train_processed, y_train_encoded,
    validation_data=(X_test_processed, y_test_encoded),
    epochs=50,
    batch_size=30,  # Smaller batch size
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test_processed, y_test_encoded, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")

# Make predictions
y_pred = model.predict(X_test_processed)
y_pred_classes = np.argmax(y_pred, axis=1) + 1  # Convert back to 1-based indexing

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f"Final Accuracy Score: {accuracy:.4f}")

# If accuracy is still too high, adjust these parameters:
# 1. Increase noise_level (0.05 -> 0.1)
# 2. Increase num_bins (15 -> 20)
# 3. Increase dropout rates (0.6 -> 0.7)
# 4. Decrease model size (32 -> 24 units)

# Save model and preprocessor
model.save('playlist_model.h5')
# Save the entire preprocessor object
joblib.dump(preprocessor, 'preprocessor.pkl')

def load_model_and_preprocessor():
    # Load the trained model and preprocessor
    model = tf.keras.models.load_model('playlist_model.h5')
    preprocessor = joblib.load('preprocessor.pkl')
    return model, preprocessor

def predict_playlist(emotion, weather, time):
    # Load the trained model and preprocessor
    model, preprocessor = load_model_and_preprocessor()

    # Prepare input for prediction as a DataFrame with correct column names
    # This is necessary because the ColumnTransformer expects a DataFrame
    # or array-like with features in the correct order.
    input_data = pd.DataFrame({
        'Emotion': [emotion],
        'Time': [time],
        'Weather': [weather],
        'Tempo_Min': [0] # Tempo_Min is not part of the input, use a placeholder
                         # The preprocessor expects all original features.
                         # The StandardScaler for Tempo_Min will just process this placeholder.
    })


    # Preprocess the input data using the loaded preprocessor
    input_processed = preprocessor.transform(input_data)

    # Make prediction
    prediction = model.predict(input_processed)
    predicted_class = np.argmax(prediction, axis=1)[0] + 1  # Adjusting index to match playlist number

    return predicted_class

# Example usage
predicted_playlist = predict_playlist("Neutral", "Sunny", "Morning")
print(f"Recommended Playlist: {predicted_playlist}")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.2335 - loss: 2.7574 - val_accuracy: 0.3075 - val_loss: 2.0746 - learning_rate: 0.0050
Epoch 2/50
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4108 - loss: 2.0193 - val_accuracy: 0.8060 - val_loss: 1.3383 - learning_rate: 0.0050
Epoch 3/50
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4500 - loss: 1.9426 - val_accuracy: 0.8355 - val_loss: 1.0898 - learning_rate: 0.0050
Epoch 4/50
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4585 - loss: 1.9206 - val_accuracy: 0.9050 - val_loss: 0.8899 - learning_rate: 0.0050
Epoch 5/50
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4763 - loss: 1.8972 - val_accuracy: 0.8725 - val_loss: 0.9302 - learning_rate: 0.0050
Epoch 6/50
[1m267/267[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/



Final Accuracy Score: 0.9000




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
Recommended Playlist: 4
