# Configura√ß√µes

## Importa√ß√µes

In [2]:
import os

import sys
sys.path.append('../functions')

import pandas as pd
import numpy as np

from preprocessing import apply_boxcox_transform
from recommendation import recomendar_musicas
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
import joblib

## Defini√ß√£o do Dataframe

In [3]:
df_6f = pd.read_csv('../datasets/features_6f.csv')

df_4f = pd.read_csv('../datasets/features_4f.csv')

In [4]:
df_6f.columns

Index(['acousticness', 'danceability', 'energy', 'instrumentalness',
       'speechiness', 'popularity'],
      dtype='object')

In [5]:
df_4f.columns

Index(['acousticness', 'danceability', 'energy', 'popularity'], dtype='object')

# Modelagem

In [6]:
features_6f = ['acousticness', 'danceability', 'energy', 'instrumentalness',
       'speechiness', 'popularity']

features_4f = ['acousticness', 'danceability', 'energy', 'popularity']

## Carregar dados e preprocessors

In [7]:
preprocessor_6f = joblib.load('../models/pipeline_preprocessor_6f.joblib')
predictor_6f = joblib.load('../models/pipeline_predictor_6f.joblib')
preprocessor_4f = joblib.load('../models/pipeline_preprocessor_4f.joblib')
predictor_4f = preprocessor_4f

In [8]:
df_6f_with_id = pd.read_csv('../datasets/pre_processing_6f.csv')
df_4f_with_id = pd.read_csv('../datasets/pre_processing_4f.csv')

## Recomendar

## Divis√£o de treino e teste

In [9]:
X_6f_train, X_6f_test = train_test_split(df_6f[features_6f], test_size=0.3, random_state=42)

X_4f_train, X_4f_test = train_test_split(df_4f[features_4f], test_size=0.3, random_state=42)

In [10]:
model_6f = NearestNeighbors(n_neighbors=20, metric='euclidean', algorithm='kd_tree')
X_6f_normalized = preprocessor_6f.transform(df_6f[features_6f])
model_6f.fit(X_6f_normalized)

model_4f = NearestNeighbors(n_neighbors=20, metric='euclidean', algorithm='kd_tree')
X_4f_normalized = preprocessor_4f.transform(df_4f[features_4f])
model_4f.fit(X_4f_normalized)

0,1,2
,n_neighbors,20
,radius,1.0
,algorithm,'kd_tree'
,leaf_size,30
,metric,'euclidean'
,p,2
,metric_params,
,n_jobs,


# Avalia√ß√£o do modelo

In [11]:
features_6f = ['acousticness', 'danceability', 'energy', 'instrumentalness', 
               'speechiness', 'popularity']
features_4f = ['acousticness', 'danceability', 'energy', 'popularity']

## Teste 1: Modelo de 6 Features

In [12]:
print("\n" + "="*60)
print("üéµ TESTE 1: MODELO COM 6 FEATURES")
print("="*60)

# ‚ö†Ô∏è IMPORTANTE: Usar valores ORIGINAIS (n√£o normalizados!)
# Ranges: acousticness, danceability, energy, instrumentalness, speechiness: 0-1
# Popularity: 0-100
input_exemplo_6f = {
    'acousticness': 0.5,        # 0-1
    'danceability': 0.8,        # 0-1
    'energy': 0.6,              # 0-1
    'instrumentalness': 0.1,    # 0-1
    'speechiness': 0.05,        # 0-1
    'popularity': 50            # 0-100 (ORIGINAL!)
}

print("\nüìä Input (valores originais):")
print(input_exemplo_6f)

top_20_6f = recomendar_musicas(
    input_exemplo_6f, 
    df_6f,
    df_6f_with_id,
    model_6f, 
    predictor_6f, 
    features_6f, 
    top_n=20
)

print("\nüé∂ Top 20 Recomenda√ß√µes (6F):")
print(top_20_6f[['id', 'acousticness', 'danceability', 'energy', 'popularity', 'distancia']].head(10))



üéµ TESTE 1: MODELO COM 6 FEATURES

üìä Input (valores originais):
{'acousticness': 0.5, 'danceability': 0.8, 'energy': 0.6, 'instrumentalness': 0.1, 'speechiness': 0.05, 'popularity': 50}

üé∂ Top 20 Recomenda√ß√µes (6F):
                       id  acousticness  danceability    energy  popularity  \
0  51FH6yUZRNKbGkN3qzyiXK      0.020665      1.481962  0.469011    0.947210   
1  4tyVEkwuguux509geL9VNH     -0.021817      1.390713  0.584946    0.900877   
2  4X99feVnw46VIk2VNqWWL3     -0.013851      1.687273  0.229660    1.086210   
3  421jr3tD4xaF0pjmRlsP1i     -0.197057      1.647352  0.214701    0.900877   
4  5wQPzPYb5vE6OLWFFats8U     -0.181126      1.727195  0.678443    0.715543   
5  05vh32d4pXbYfBrM6S3H2f      0.238387      1.424932  0.682183    1.039877   
6  1n3CDGhOHLq0ov22xLtDn4     -0.072265      1.481962  0.203481    0.437542   
7  5XVb0qfmmkmmsyy7j8oObk     -0.210332      1.613133  0.457791    0.761876   
8  5i3BqMtsZkkXGjETBz3J5c     -0.348400      1.681570  0.44283

## Teste 2: Modelo de 4 Features

In [13]:
print("\n" + "="*60)
print("üéµ TESTE 2: MODELO COM 4 FEATURES")
print("="*60)

# ‚ö†Ô∏è IMPORTANTE: Usar valores ORIGINAIS (n√£o normalizados!)
input_exemplo_4f = {
    'acousticness': 0.3,        # 0-1
    'danceability': 0.7,        # 0-1
    'energy': 0.8,              # 0-1
    'popularity': 75            # 0-100 (ORIGINAL!)
}

print("\nüìä Input (valores originais):")
print(input_exemplo_4f)

top_20_4f = recomendar_musicas(
    input_exemplo_4f, 
    df_4f,
    df_4f_with_id,
    model_4f, 
    preprocessor_4f, 
    features_4f, 
    top_n=20
)

print("\nüé∂ Top 20 Recomenda√ß√µes (4F):")
print(top_20_4f[['id', 'acousticness', 'danceability', 'energy', 'popularity', 'distancia']].head(10))



üéµ TESTE 2: MODELO COM 4 FEATURES

üìä Input (valores originais):
{'acousticness': 0.3, 'danceability': 0.7, 'energy': 0.8, 'popularity': 75}

üé∂ Top 20 Recomenda√ß√µes (4F):
                       id  acousticness  danceability    energy  popularity  \
0  3u1S1OmAUhx5DRlLrXqyp3     -0.451950      0.866031  1.104786    2.012880   
1  1nMYtxDrONcoGnKRvxTwPv     -0.568777      1.025717  1.183323    1.873880   
2  0puf9yIluy9W0vpMEUoAnN     -0.619225      0.957280  1.112266    2.151880   
3  3yk7PJnryiJ8mAPqsrujzf     -0.433364      1.020014  1.127225    1.873880   
4  0RDgqtvOHLwcI6yz9bjsZV     -0.597983      0.985795  1.003810    2.105547   
5  2smpiAZfaN0GFi15MqAq6E     -0.414778      1.054232  1.273079    1.920213   
6  7cWh3ScxjhvasLI0CBRoZk     -0.449295      0.934467  1.071127    1.827546   
7  5ZSl6gDoV6bPPxzmLeneV9     -0.441330      0.848921  1.108526    1.827546   
8  2EEmzqzM70CybVspgM7YRT     -0.682948      0.871734  1.063648    2.105547   
9  4y1LsJpmMti1PfRQV9AWWe    

## M√©tricas

### Teste 1: Cobertura e Qualidade (6F)

In [14]:
print("\n" + "="*60)
print("üéØ TESTE 1: MODELO 6 FEATURES - COBERTURA E QUALIDADE")
print("="*60)

X_6f_test_normalized = preprocessor_6f.transform(X_6f_test)
distances_test_6f, indices_test_6f = model_6f.kneighbors(X_6f_test_normalized)

print(f"\nüìà Estat√≠sticas de Dist√¢ncia (6F):")
print(f"   Dist√¢ncia m√≠nima: {distances_test_6f.min():.4f}")
print(f"   Dist√¢ncia m√°xima: {distances_test_6f.max():.4f}")
print(f"   Dist√¢ncia m√©dia: {distances_test_6f.mean():.4f}")
print(f"   Dist√¢ncia mediana: {np.median(distances_test_6f):.4f}")
print(f"   Desvio padr√£o: {distances_test_6f.std():.4f}")

print(f"\nüìä Distribui√ß√£o de Dist√¢ncias (6F):")
percentis = [10, 25, 50, 75, 90]
for p in percentis:
    val = np.percentile(distances_test_6f, p)
    print(f"   {p}¬∫ percentil: {val:.4f}")


üéØ TESTE 1: MODELO 6 FEATURES - COBERTURA E QUALIDADE

üìà Estat√≠sticas de Dist√¢ncia (6F):
   Dist√¢ncia m√≠nima: 0.0000
   Dist√¢ncia m√°xima: 3.5261
   Dist√¢ncia m√©dia: 0.3412
   Dist√¢ncia mediana: 0.3267
   Desvio padr√£o: 0.1603

üìä Distribui√ß√£o de Dist√¢ncias (6F):
   10¬∫ percentil: 0.1479
   25¬∫ percentil: 0.2405
   50¬∫ percentil: 0.3267
   75¬∫ percentil: 0.4250
   90¬∫ percentil: 0.5318


### Teste 2: Cobertura e Qualidade (4F)

In [15]:
print("\n" + "="*60)
print("üéØ TESTE 2: MODELO 4 FEATURES - COBERTURA E QUALIDADE")
print("="*60)

X_4f_test_normalized = preprocessor_4f.transform(X_4f_test)
distances_test_4f, indices_test_4f = model_4f.kneighbors(X_4f_test_normalized)

print(f"\nüìà Estat√≠sticas de Dist√¢ncia (4F):")
print(f"   Dist√¢ncia m√≠nima: {distances_test_4f.min():.4f}")
print(f"   Dist√¢ncia m√°xima: {distances_test_4f.max():.4f}")
print(f"   Dist√¢ncia m√©dia: {distances_test_4f.mean():.4f}")
print(f"   Dist√¢ncia mediana: {np.median(distances_test_4f):.4f}")
print(f"   Desvio padr√£o: {distances_test_4f.std():.4f}")

print(f"\nüìä Distribui√ß√£o de Dist√¢ncias (4F):")
for p in percentis:
    val = np.percentile(distances_test_4f, p)
    print(f"   {p}¬∫ percentil: {val:.4f}")


üéØ TESTE 2: MODELO 4 FEATURES - COBERTURA E QUALIDADE

üìà Estat√≠sticas de Dist√¢ncia (4F):
   Dist√¢ncia m√≠nima: 0.0000
   Dist√¢ncia m√°xima: 1.2265
   Dist√¢ncia m√©dia: 0.1433
   Dist√¢ncia mediana: 0.1400
   Desvio padr√£o: 0.0847

üìä Distribui√ß√£o de Dist√¢ncias (4F):
   10¬∫ percentil: 0.0413
   25¬∫ percentil: 0.0900
   50¬∫ percentil: 0.1400
   75¬∫ percentil: 0.1862
   90¬∫ percentil: 0.2380


## Compara√ß√£o dos modelos

In [16]:
print("\n" + "="*60)
print("‚öñÔ∏è COMPARA√á√ÉO ENTRE MODELOS")
print("="*60)

print(f"\nüîÑ Modelo 6F:")
print(f"   Dist√¢ncia m√©dia no teste: {distances_test_6f.mean():.4f}")
print(f"   Total de amostras de teste: {len(X_6f_test)}")
print(f"   Features utilizadas: {len(features_6f)}")

print(f"\nüîÑ Modelo 4F:")
print(f"   Dist√¢ncia m√©dia no teste: {distances_test_4f.mean():.4f}")
print(f"   Total de amostras de teste: {len(X_4f_test)}")
print(f"   Features utilizadas: {len(features_4f)}")

# Qual modelo √© melhor?
if distances_test_6f.mean() < distances_test_4f.mean():
    print(f"\n‚úÖ Modelo 6F √© MELHOR (menor dist√¢ncia m√©dia)")
    improvement = ((distances_test_4f.mean() - distances_test_6f.mean()) / distances_test_4f.mean()) * 100
    print(f"   Melhoria: {improvement:.2f}%")
else:
    print(f"\n‚úÖ Modelo 4F √© MELHOR (menor dist√¢ncia m√©dia)")
    improvement = ((distances_test_6f.mean() - distances_test_4f.mean()) / distances_test_6f.mean()) * 100
    print(f"   Melhoria: {improvement:.2f}%")


‚öñÔ∏è COMPARA√á√ÉO ENTRE MODELOS

üîÑ Modelo 6F:
   Dist√¢ncia m√©dia no teste: 0.3412
   Total de amostras de teste: 50973
   Features utilizadas: 6

üîÑ Modelo 4F:
   Dist√¢ncia m√©dia no teste: 0.1433
   Total de amostras de teste: 50973
   Features utilizadas: 4

‚úÖ Modelo 4F √© MELHOR (menor dist√¢ncia m√©dia)
   Melhoria: 58.01%


# Gerar o modelo

In [17]:
joblib.dump(model_6f, '../models/music_recommender_model_6f.joblib')
joblib.dump(model_4f, '../models/music_recommender_model_4f.joblib')
joblib.dump(features_6f, '../models/music_model_features_6f.pkl')
joblib.dump(features_4f, '../models/music_model_features_4f.pkl')

['../models/music_model_features_4f.pkl']