In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix

In [6]:
class SpotifyDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class SpotifyRankPredictor(nn.Module):
    def __init__(self, num_categories):
        super(SpotifyRankPredictor, self).__init__()
        self.input_layer = nn.Linear(8, 64)
        self.block1 = nn.Sequential(
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3)
        )
        self.block2 = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.3)
        )
        self.output_layer = nn.Linear(32, num_categories)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.input_layer(x)
        x = nn.ReLU()(x)
        identity = x
        x = self.block1(x) + identity
        x = self.block2(x)
        x = self.output_layer(x)
        return self.softmax(x)

class CustomLoss(nn.Module):
    def __init__(self, weight_for_category_diff=1.0):
        super(CustomLoss, self).__init__()
        self.weight_for_category_diff = weight_for_category_diff

    def forward(self, outputs, targets):
        ce_loss = nn.CrossEntropyLoss()(outputs, torch.max(targets, 1)[1])
        pred_categories = torch.argmax(outputs, dim=1)
        true_categories = torch.argmax(targets, dim=1)
        category_diff = (pred_categories - true_categories).float()
        category_diff_square = torch.sum(category_diff ** 2).float()
        total_loss = ce_loss + self.weight_for_category_diff * category_diff_square
        return total_loss

def calculate_class_weights(categories):
    class_counts = np.bincount(categories)
    weights = 1 + np.log1p(np.max(class_counts) / class_counts)
    return torch.FloatTensor(weights)

def preprocess_data(df):
    feature_names = ['Danceability', 'Energy', 'Loudness', 'Speechiness',
                     'Acousticness', 'Liveness', 'Tempo', 'Duration (ms)']
    X = df[feature_names].values

    def rank_to_category(rank):
        if rank <= 30:
            return 0  # Top 30
        elif rank <= 50:
            return 1  # 31-50
        else:
            return 2  # 50+ or Not Charted

    ranks = df['Highest Charting Position'].values
    categories = np.array([rank_to_category(rank) for rank in ranks])
    num_categories = 3
    y_encoded = np.eye(num_categories)[categories]

    return X, y_encoded, num_categories, feature_names

def augment_features(features, num_samples_needed, feature_names, noise_scales):
    augmented_data = []
    for _ in range(num_samples_needed):
        base_sample = features[np.random.randint(len(features))]
        new_features = []
        for feat_idx, feat_name in enumerate(feature_names):
            feature = base_sample[feat_idx]
            noise = np.random.normal(0, noise_scales[feat_name])
            new_value = feature + noise

            if feat_name == 'Loudness':
                new_value = np.clip(new_value, -60, 0)
            elif feat_name == 'Duration (ms)':
                new_value = max(1000, new_value)
            else:
                new_value = np.clip(new_value, 0, 1)

            new_features.append(new_value)
        augmented_data.append(new_features)

    return np.array(augmented_data)

def augment_and_balance_training_data(X, y, scaler, feature_names):
    categories = np.argmax(y, axis=1)
    category_counts = np.bincount(categories)
    print("Original category counts in train set:", category_counts)

    target_samples = int(max(category_counts) * 0.9)
    print("Target samples per category:", target_samples)

    processed_features = []
    processed_categories = []

    X_scaled = scaler.transform(X)
    noise_scales = {
        feat: np.std(X_scaled[:, i]) * 0.1
        for i, feat in enumerate(feature_names)
    }

    for category in range(3):
        category_mask = categories == category
        category_features = X[category_mask]
        current_samples = len(category_features)

        if current_samples < target_samples:
            num_samples_needed = target_samples - current_samples
            augmented = augment_features(category_features, num_samples_needed, feature_names, noise_scales)
            processed_features.append(np.vstack([category_features, augmented]))
            processed_categories.extend([category] * target_samples)
        else:
            processed_features.append(category_features)
            processed_categories.extend([category] * current_samples)

    X_combined = np.vstack(processed_features)
    categories_combined = np.array(processed_categories)
    X_scaled = scaler.transform(X_combined)
    y_combined = np.eye(3)[categories_combined]

    print("Final category counts in balanced train set:", np.bincount(categories_combined))
    class_weights = calculate_class_weights(categories_combined)

    return X_scaled, y_combined, class_weights

def train_model(model, train_loader, val_loader, criterion, optimizer,
                num_epochs=150, patience=15):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    criterion = criterion.to(device)

    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []
    best_val_accuracy = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            _, actual = torch.max(y_batch.data, 1)
            train_total += y_batch.size(0)
            train_correct += (predicted == actual).sum().item()

        train_loss /= len(train_loader)
        train_accuracy = 100 * train_correct / train_total
        train_losses.append(train_loss)

        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                _, actual = torch.max(y_batch.data, 1)
                val_total += y_batch.size(0)
                val_correct += (predicted == actual).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_losses.append(val_loss)

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%')

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            patience_counter = 0
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_accuracy': val_accuracy,
            }, 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping triggered! Best validation accuracy: {best_val_accuracy:.2f}%')
                break

    return train_losses, val_losses

def analyze_misclassification_patterns(model, test_loader, feature_names, scaler):
    model.eval()
    # 각 오분류 패턴별로 데이터를 저장할 딕셔너리
    misclassification_patterns = {
        (0, 1): {'features': [], 'confidences': [], 'features_orig': []},  # Top 30 -> 31-50
        (0, 2): {'features': [], 'confidences': [], 'features_orig': []},  # Top 30 -> 50+
        (1, 0): {'features': [], 'confidences': [], 'features_orig': []},  # 31-50 -> Top 30
        (1, 2): {'features': [], 'confidences': [], 'features_orig': []},  # 31-50 -> 50+
        (2, 0): {'features': [], 'confidences': [], 'features_orig': []},  # 50+ -> Top 30
        (2, 1): {'features': [], 'confidences': [], 'features_orig': []}   # 50+ -> 31-50
    }

    # 정확히 분류된 케이스도 저장
    correct_classifications = {
        0: {'features': [], 'confidences': [], 'features_orig': []},  # Top 30
        1: {'features': [], 'confidences': [], 'features_orig': []},  # 31-50
        2: {'features': [], 'confidences': [], 'features_orig': []}   # 50+
    }

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            probabilities = outputs
            _, predicted = torch.max(outputs.data, 1)
            _, actual = torch.max(y_batch.data, 1)

            # 원래 스케일로 복원
            original_features = scaler.inverse_transform(X_batch)

            for i in range(len(predicted)):
                pred = predicted[i].item()
                act = actual[i].item()
                conf = probabilities[i][pred].item()
                features = X_batch[i].numpy()
                features_orig = original_features[i]

                if pred == act:
                    correct_classifications[act]['features'].append(features)
                    correct_classifications[act]['confidences'].append(conf)
                    correct_classifications[act]['features_orig'].append(features_orig)
                else:
                    pattern = (act, pred)
                    if pattern in misclassification_patterns:
                        misclassification_patterns[pattern]['features'].append(features)
                        misclassification_patterns[pattern]['confidences'].append(conf)
                        misclassification_patterns[pattern]['features_orig'].append(features_orig)

    # 분석 결과를 저장할 딕셔너리
    analysis_results = {}

    # 각 오분류 패턴에 대한 통계 계산
    for pattern, data in misclassification_patterns.items():
        if data['features']:  # 해당 패턴의 케이스가 있는 경우만
            features_array = np.array(data['features'])
            features_orig_array = np.array(data['features_orig'])
            mean_features = np.mean(features_orig_array, axis=0)
            std_features = np.std(features_orig_array, axis=0)
            mean_confidence = np.mean(data['confidences'])

            actual_class = pattern[0]
            if correct_classifications[actual_class]['features']:
                correct_features = np.array(correct_classifications[actual_class]['features_orig'])
                correct_mean = np.mean(correct_features, axis=0)
                feature_differences = mean_features - correct_mean

                analysis_results[pattern] = {
                    'count': len(data['features']),
                    'mean_confidence': mean_confidence,
                    'feature_analysis': {
                        feature: {
                            'mean': mean_features[i],
                            'std': std_features[i],
                            'diff_from_correct': feature_differences[i],
                            'orig_values': features_orig_array[:, i].tolist()  # 개별 케이스들의 값 저장
                        }
                        for i, feature in enumerate(feature_names)
                    }
                }

    return analysis_results

def print_misclassification_analysis(analysis_results, feature_names):
    class_names = ['Top 30', '31-50', '50+ or Not Charted']
    min_diff_threshold = 0.5

    print("\n=== 오분류 패턴 분석 ===")
    pattern_order = [(0,1), (0,2), (1,0), (1,2), (2,0), (2,1)]  # 분석할 패턴 순서 지정

    for pattern in pattern_order:
        if pattern not in analysis_results:
            continue

        results = analysis_results[pattern]
        actual, predicted = pattern
        print(f"\n실제 {class_names[actual]}를 {class_names[predicted]}로 잘못 예측한 케이스:")
        print(f"발생 횟수: {results['count']}")
        print(f"평균 신뢰도: {results['mean_confidence']:.3f}")

        print("\n주요 특성 차이 (올바르게 분류된 케이스와 비교):")
        # 차이가 큰 순서대로 정렬
        sorted_features = sorted(
            feature_names,
            key=lambda x: abs(results['feature_analysis'][x]['diff_from_correct']),
            reverse=True
        )

        for feature in sorted_features[:3]:  # 상위 3개 특성만 표시
            analysis = results['feature_analysis'][feature]
            diff = analysis['diff_from_correct']
            print(f"\n  {feature}:")
            print(f"    평균값: {analysis['mean']:.3f} (±{analysis['std']:.3f})")
            print(f"    정상 케이스와의 차이: {diff:+.3f}")

            # 개별 케이스들의 값 출력 (최대 5개)
            orig_values = analysis['orig_values']
            if len(orig_values) > 0:
                print(f"    오분류된 케이스들의 실제 값 (최대 5개): {', '.join(f'{v:.3f}' for v in orig_values[:5])}")

            if abs(diff) > min_diff_threshold:
                direction = "높음" if diff > 0 else "낮음"
                print(f"    → 잘못 분류된 케이스에서 눈에 띄게 {direction}")

def analyze_feature_importance(model, test_loader, feature_names):
    model.eval()
    feature_impacts = {name: [] for name in feature_names}

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            base_output = model(X_batch)

            for i, feature in enumerate(feature_names):
                X_modified = X_batch.clone()
                X_modified[:, i] += 0.1
                modified_output = model(X_modified)
                output_change = torch.abs(modified_output - base_output).mean().item()
                feature_impacts[feature].append(output_change)

    for feature in feature_names:
        feature_impacts[feature] = np.mean(feature_impacts[feature])

    return feature_impacts

def analyze_misclassifications(model, test_loader, feature_names, scaler):
    model.eval()
    misclassified_samples = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 1)
            _, actual = torch.max(y_batch.data, 1)

            # 잘못 예측된 샘플 찾기
            mask = (predicted != actual)
            if mask.any():
                wrong_X = X_batch[mask]
                wrong_pred = predicted[mask]
                wrong_actual = actual[mask]
                wrong_probs = outputs[mask]

                # 원래 스케일로 되돌리기
                original_features = scaler.inverse_transform(wrong_X)

                for i in range(len(wrong_X)):
                    sample = {
                        'actual': wrong_actual[i].item(),
                        'predicted': wrong_pred[i].item(),
                        'confidence': wrong_probs[i].max().item(),
                        'features': dict(zip(feature_names, original_features[i]))
                    }
                    misclassified_samples.append(sample)

    return misclassified_samples

def main():
    # 1. 데이터 로드 및 기본 전처리
    df = pd.read_csv('spotify_dataset.csv')
    X, y_encoded, num_categories, feature_names = preprocess_data(df)

    # 2. 데이터 분할 (증강 전)
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y_encoded, test_size=0.2, random_state=42,
        stratify=y_encoded.argmax(axis=1)
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=42,
        stratify=y_temp.argmax(axis=1)
    )

    # 3. StandardScaler 학습 (train set으로만)
    scaler = StandardScaler()
    scaler.fit(X_train)

    # 4. train 데이터에만 증강 및 밸런싱 적용
    X_train_processed, y_train_processed, class_weights = augment_and_balance_training_data(
        X_train, y_train, scaler, feature_names
    )

    # 5. validation과 test set은 스케일링만 적용
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    # 6. 데이터셋 생성
    train_dataset = SpotifyDataset(X_train_processed, y_train_processed)
    val_dataset = SpotifyDataset(X_val_scaled, y_val)
    test_dataset = SpotifyDataset(X_test_scaled, y_test)

    batch_size = 64
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # 7. 모델, 손실함수, 옵티마이저 설정
    model = SpotifyRankPredictor(num_categories)
    criterion = CustomLoss(weight_for_category_diff=1000)
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=5, verbose=True
    )

    # 8. 모델 학습
    train_losses, val_losses = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=150,
        patience=15
    )

    # 9. 최고 성능 모델 불러오기
    checkpoint = torch.load('best_model.pth')
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # 10. Test set evaluation
    correct = 0
    total = 0
    y_pred = []
    y_true = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 1)
            _, actual = torch.max(y_batch.data, 1)
            total += y_batch.size(0)
            correct += (predicted == actual).sum().item()
            y_pred.extend(predicted.numpy())
            y_true.extend(actual.numpy())

    # 11. 성능 평가 및 분석
    # 11-1. 기본 성능 지표
    accuracy = 100 * correct / total
    print(f'\nTest Accuracy: {accuracy:.2f}%')

    # 11-2. 혼동 행렬
    conf_matrix = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(conf_matrix)
    print("\nClass weights:", class_weights)

    # 11-3. 오분류 패턴 상세 분석
    analysis_results = analyze_misclassification_patterns(
        model, test_loader, feature_names, scaler
    )
    print_misclassification_analysis(analysis_results, feature_names)

    # 11-4. 클래스별 오분류 분석
    misclassified = analyze_misclassifications(model, test_loader, feature_names, scaler)

    print("\n=== 클래스별 오분류 상세 분석 ===")
    class_names = ['Top 30', '31-50', '50+ or Not Charted']

    for category in range(3):
        wrong_predictions = [s for s in misclassified if s['actual'] == category]
        if wrong_predictions:
            print(f"\n{class_names[category]} 클래스 분석:")
            print(f"총 오분류 개수: {len(wrong_predictions)}")

            # 예측된 클래스별로 그룹화
            predicted_groups = {}
            for pred in wrong_predictions:
                pred_class = pred['predicted']
                if pred_class not in predicted_groups:
                    predicted_groups[pred_class] = []
                predicted_groups[pred_class].append(pred)

            # 각 예측 그룹별 분석
            for pred_class, group in predicted_groups.items():
                print(f"\n- {class_names[pred_class]}(으)로 잘못 예측된 케이스:")
                print(f"  케이스 수: {len(group)}")
                avg_confidence = np.mean([p['confidence'] for p in group])
                print(f"  평균 신뢰도: {avg_confidence:.3f}")

                # 특성별 분석
                print("  주요 특성 차이:")
                # 특성별 차이 계산
                feature_diffs = {}
                for feature in feature_names:
                    values = [p['features'][feature] for p in group]
                    correct_preds = [s for s in misclassified if s['actual'] == category and s['predicted'] == category]
                    if correct_preds:
                        correct_values = [p['features'][feature] for p in correct_preds]
                        correct_mean = np.mean(correct_values)
                        values_mean = np.mean(values)
                        diff = values_mean - correct_mean
                        feature_diffs[feature] = (abs(diff), diff, values)

                # 차이가 큰 상위 3개 특성만 출력
                sorted_features = sorted(feature_diffs.items(), key=lambda x: x[1][0], reverse=True)
                for feature, (_, diff, values) in sorted_features[:3]:
                    print(f"    {feature}:")
                    print(f"      평균값: {np.mean(values):.3f} (±{np.std(values):.3f})")
                    print(f"      정상 케이스와의 차이: {diff:+.3f}")
                    if abs(diff) > 0.5:
                        direction = "높음" if diff > 0 else "낮음"
                        print(f"      → 잘못 분류된 케이스에서 눈에 띄게 {direction}")

    # 11-5. 특성 중요도 분석
    feature_importance = analyze_feature_importance(model, test_loader, feature_names)
    print("\n=== 특성 중요도 분석 ===")
    sorted_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
    for feature, importance in sorted_features:
        print(f"{feature}: {importance:.4f}")

    print("\n=== 전체 요약 ===")
    print(f"전체 정확도: {accuracy:.2f}%")
    print("\n각 클래스별 성능:")
    for i in range(num_categories):
        class_correct = conf_matrix[i][i]
        class_total = np.sum(conf_matrix[i])
        class_accuracy = 100 * class_correct / class_total if class_total > 0 else 0
        print(f"{class_names[i]}: {class_accuracy:.2f}% ({class_correct}/{class_total})")

if __name__ == "__main__":
    main()

Original category counts in train set: [ 462  188 1102]
Target samples per category: 991
Final category counts in balanced train set: [ 991  991 1102]
Epoch [1/150], Train Loss: 64409.1875, Train Acc: 49.61%, Val Loss: 68250.9824, Val Acc: 58.90%




Epoch [2/150], Train Loss: 53756.0441, Train Acc: 60.41%, Val Loss: 63750.9370, Val Acc: 63.01%
Epoch [3/150], Train Loss: 53021.3450, Train Acc: 60.99%, Val Loss: 63750.9219, Val Acc: 63.01%
Epoch [4/150], Train Loss: 52919.2937, Train Acc: 61.35%, Val Loss: 63750.9185, Val Acc: 63.01%
Epoch [5/150], Train Loss: 52837.6610, Train Acc: 61.28%, Val Loss: 63750.9165, Val Acc: 63.01%
Epoch [6/150], Train Loss: 51858.0662, Train Acc: 62.06%, Val Loss: 63750.9141, Val Acc: 63.01%
Epoch [7/150], Train Loss: 52572.3496, Train Acc: 61.51%, Val Loss: 63750.9146, Val Acc: 63.01%
Epoch [8/150], Train Loss: 52062.1459, Train Acc: 62.13%, Val Loss: 63750.9185, Val Acc: 63.01%
Epoch [9/150], Train Loss: 51776.4305, Train Acc: 62.09%, Val Loss: 63750.9189, Val Acc: 63.01%
Epoch [10/150], Train Loss: 51204.9982, Train Acc: 63.10%, Val Loss: 63750.9136, Val Acc: 63.01%
Epoch [11/150], Train Loss: 52102.9561, Train Acc: 62.06%, Val Loss: 62750.9121, Val Acc: 63.47%
Epoch [12/150], Train Loss: 52184.5870

  checkpoint = torch.load('best_model.pth')
