In [3]:
import mindspore.nn as nn
import mindspore.dataset as ds
import mindspore as ms
from mindspore import Tensor, Model
import mindspore.dataset.vision as vision
import mindspore.dataset.transforms as transforms
from mindspore.train.callback import LossMonitor
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# Step 1: 加载并处理数据集
df = pd.read_csv("processed_dataset.csv")

# 将 'explicit' 列转换为布尔值
df['explicit'] = df['explicit'].astype(int)

# 将 'track_genre' 进行 One-Hot 编码
encoder = OneHotEncoder(sparse_output=False)
track_genre_encoded = encoder.fit_transform(df[['track_genre']])

# 将编码后的数据添加回数据框并移除原始的 'track_genre' 列
encoded_columns = encoder.get_feature_names_out(['track_genre'])
df_encoded = pd.DataFrame(track_genre_encoded, columns=encoded_columns)
df = pd.concat([df.drop(columns=['track_genre']), df_encoded], axis=1)

# 提取特征和标签
X = df.drop(columns=encoded_columns).astype(np.float32).values
y = track_genre_encoded
print(X,y)

[[2.30666e+05 0.00000e+00 6.76000e-01 ... 7.15000e-01 8.79170e+01
  4.00000e+00]
 [1.49610e+05 0.00000e+00 4.20000e-01 ... 2.67000e-01 7.74890e+01
  4.00000e+00]
 [2.10826e+05 0.00000e+00 4.38000e-01 ... 1.20000e-01 7.63320e+01
  4.00000e+00]
 ...
 [2.71466e+05 0.00000e+00 6.29000e-01 ... 7.43000e-01 1.32378e+02
  4.00000e+00]
 [2.83893e+05 0.00000e+00 5.87000e-01 ... 4.13000e-01 1.35960e+02
  4.00000e+00]
 [2.41826e+05 0.00000e+00 5.26000e-01 ... 7.08000e-01 7.91980e+01
  4.00000e+00]] [[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]


In [1]:

# Step 2: 定义一个简单的 MLP 分类器
class MLPClassifier(nn.Cell):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLPClassifier, self).__init__()
        # 输入层到第一层隐藏层的全连接层
        self.fc1 = nn.Dense(input_size, hidden_sizes[0], weight_init="normal")
        # 激活函数 ReLU
        self.relu1 = nn.ReLU()
        # 第一层隐藏层到第二层隐藏层的全连接层
        self.fc2 = nn.Dense(hidden_sizes[0], hidden_sizes[1], weight_init="normal")
        # 激活函数 ReLU
        self.relu2 = nn.ReLU()
        # 第二层隐藏层到输出层的全连接层
        self.fc3 = nn.Dense(hidden_sizes[1], output_size, weight_init="normal")

    def construct(self, x):
        # 数据通过输入层进入第一层隐藏层
        x = self.fc1(x)
        # 使用 ReLU 激活函数
        x = self.relu1(x)
        # 数据通过第一层隐藏层进入第二层隐藏层
        x = self.fc2(x)
        # 使用 ReLU 激活函数
        x = self.relu2(x)
        # 数据通过第二层隐藏层进入输出层
        x = self.fc3(x)
        return x

# Step 3: 创建模型实例
input_size = X.shape[1]  # 输入层的特征数量
hidden_sizes = [100, 50]  # 隐藏层中的神经元数量
output_size = y.shape[1]  # 输出层的神经元数量（类别数量）
model = MLPClassifier(input_size, hidden_sizes, output_size)

# Step 4: 定义超参数、损失函数和优化器
epochs = 3
batch_size = 32
learning_rate = 0.01

loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False, reduction='mean')  # 损失函数
optimizer = nn.Adam(params=model.trainable_params(), learning_rate=learning_rate)  # 优化器

# Step 5: 划分训练集和测试集
# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# train_dataset = ds.NumpySlicesDataset(data=(X_train, y_train), column_names=['features', 'labels'], shuffle=True).batch(batch_size)
# test_dataset = ds.NumpySlicesDataset(data=(X_test, y_test), column_names=['features', 'labels'], shuffle=False).batch(batch_size)
train_dataset = ds.NumpySlicesDataset(data=(X_train, y_train.astype(np.float32)), column_names=['features', 'labels'], shuffle=True).batch(batch_size)
test_dataset = ds.NumpySlicesDataset(data=(X_test, y_test.astype(np.float32)), column_names=['features', 'labels'], shuffle=False).batch(batch_size)


# Step 6: 定义训练和测试函数
def forward_fn(data, label):
    logits = model(data)
    loss = loss_fn(logits, label)
    return loss, logits

grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

def train_step(data, label):
    (loss, _), grads = grad_fn(data, label)
    optimizer(grads)
    return loss

def train_loop(model, dataset, loss_fn):
    size = dataset.get_dataset_size()
    model.set_train()
    for batch, (data, label) in enumerate(dataset.create_tuple_iterator()):
        loss = train_step(data, label)
        if batch % 100 == 0:
            loss_value = loss.asnumpy()
            print(f"loss: {loss_value:>7f}  [{batch:>3d}/{size:>3d}]")

def test_loop(model, dataset, loss_fn):
    num_batches = dataset.get_dataset_size()
    model.set_train(False)
    total, test_loss, correct = 0, 0, 0
    for data, label in dataset.create_tuple_iterator():
        pred = model(data)
        total += len(data)
        test_loss += loss_fn(pred, label).asnumpy()
        correct += (pred.argmax(1) == label.argmax(1)).asnumpy().sum()
    test_loss /= num_batches
    correct /= total
    print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# Step 7: 开始训练与测试
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(model, train_dataset, loss_fn)
    test_loop(model, test_dataset, loss_fn)
print("Done!")

# Step 8: 使用模型进行推理
# 创建一个形状为 (1, input_size) 的全为 1 的张量作为输入
X_infer = Tensor(np.ones((1, input_size)), ms.float32)
# 使用模型进行前向传播，获取输出 logits
y_pred = model(X_infer)
# 打印预测结果
print(f"Predicted class probabilities: {y_pred}")


Epoch 1
-------------------------------
loss: 21.395741  [  0/2850]
loss: 4.753157  [100/2850]
loss: 4.756130  [200/2850]
loss: 4.735509  [300/2850]
loss: 4.757111  [400/2850]
loss: 4.730098  [500/2850]
loss: 4.748776  [600/2850]
loss: 4.732485  [700/2850]
loss: 4.742818  [800/2850]
loss: 4.789379  [900/2850]
loss: 4.735844  [1000/2850]
loss: 4.731573  [1100/2850]
loss: 4.727586  [1200/2850]
loss: 4.745066  [1300/2850]
loss: 4.736310  [1400/2850]
loss: 4.723031  [1500/2850]
loss: 4.751857  [1600/2850]
loss: 4.781862  [1700/2850]
loss: 4.738662  [1800/2850]
loss: 4.735434  [1900/2850]
loss: 4.732615  [2000/2850]
loss: 4.750091  [2100/2850]
loss: 4.730134  [2200/2850]
loss: 4.740123  [2300/2850]
loss: 4.745975  [2400/2850]
loss: 4.762704  [2500/2850]
loss: 4.757313  [2600/2850]
loss: 4.740844  [2700/2850]
loss: 4.719240  [2800/2850]
Test: 
 Accuracy: 0.9%, Avg loss: 4.741728 

Epoch 2
-------------------------------
loss: 4.739935  [  0/2850]
loss: 4.765605  [100/2850]
loss: 4.746161  [2

In [2]:
import mindspore.nn as nn
import mindspore.dataset as ds
import mindspore as ms
from mindspore import Tensor, Model
import mindspore.dataset.vision as vision
import mindspore.dataset.transforms as transforms
from mindspore.train.callback import LossMonitor
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Step 1: 加载并处理数据集
df = pd.read_csv("processed_dataset.csv")

# 将 'explicit' 列转换为数值型 (True/False 转换为 1/0)
df['explicit'] = df['explicit'].astype(int)

# 将类别特征 'track_genre' 做 One-Hot 编码
encoder = OneHotEncoder(sparse_output=False)
track_genre_encoded = encoder.fit_transform(df[['track_genre']])

# 将编码后的数据添加回数据框并移除原始的 'track_genre' 列
encoded_columns = encoder.get_feature_names_out(['track_genre'])
df_encoded = pd.DataFrame(track_genre_encoded, columns=encoded_columns)
df = pd.concat([df.drop(columns=['track_genre']), df_encoded], axis=1)

# 对类别特征 'key', 'mode', 'time_signature' 进行 One-Hot 编码
categorical_features = ['key', 'mode', 'time_signature']
categorical_encoder = OneHotEncoder(sparse_output=False, drop='first')
categorical_encoded = categorical_encoder.fit_transform(df[categorical_features])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=categorical_encoder.get_feature_names_out(categorical_features))

# 合并编码后的数据和数值特征，并移除原始的类别特征
df = pd.concat([df.drop(columns=categorical_features).reset_index(drop=True), categorical_encoded_df], axis=1)

# 标准化数值特征
numerical_features = df.select_dtypes(include=['float64', 'int64'])
scaler = StandardScaler()
df[numerical_features.columns] = scaler.fit_transform(numerical_features)

# 将标签转换为 float32 类型
df_encoded = df_encoded.astype(np.float32)

# 提取特征和标签
X = df.drop(columns=encoded_columns).astype(np.float32).values
y = df_encoded.values

# Step 2: 定义一个简单的 MLP 分类器
class MLPClassifier(nn.Cell):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLPClassifier, self).__init__()
        # 输入层到第一层隐藏层的全连接层
        self.fc1 = nn.Dense(input_size, hidden_sizes[0], weight_init="normal")
        # 激活函数 ReLU
        self.relu1 = nn.ReLU()
        # 第一层隐藏层到第二层隐藏层的全连接层
        self.fc2 = nn.Dense(hidden_sizes[0], hidden_sizes[1], weight_init="normal")
        # 激活函数 ReLU
        self.relu2 = nn.ReLU()
        # 第二层隐藏层到输出层的全连接层
        self.fc3 = nn.Dense(hidden_sizes[1], output_size, weight_init="normal")

    def construct(self, x):
        # 数据通过输入层进入第一层隐藏层
        x = self.fc1(x)
        # 使用 ReLU 激活函数
        x = self.relu1(x)
        # 数据通过第一层隐藏层进入第二层隐藏层
        x = self.fc2(x)
        # 使用 ReLU 激活函数
        x = self.relu2(x)
        # 数据通过第二层隐藏层进入输出层
        x = self.fc3(x)
        return x

# Step 3: 创建模型实例
input_size = X.shape[1]  # 输入层的特征数量
hidden_sizes = [100, 50]  # 隐藏层中的神经元数量
output_size = y.shape[1]  # 输出层的神经元数量（类别数量）
model = MLPClassifier(input_size, hidden_sizes, output_size)

# Step 4: 定义超参数、损失函数和优化器
epochs = 10
batch_size = 32
learning_rate = 0.01

loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False, reduction='mean')  # 损失函数
optimizer = nn.Adam(params=model.trainable_params(), learning_rate=learning_rate)  # 优化器

# Step 5: 割分训练集和测试集
# 将数据集割分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_dataset = ds.NumpySlicesDataset(data=(X_train, y_train), column_names=['features', 'labels'], shuffle=True).batch(batch_size)
test_dataset = ds.NumpySlicesDataset(data=(X_test, y_test), column_names=['features', 'labels'], shuffle=False).batch(batch_size)

# Step 6: 定义训练和测试函数
def forward_fn(data, label):
    logits = model(data)
    loss = loss_fn(logits, label)
    return loss, logits

grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

def train_step(data, label):
    (loss, _), grads = grad_fn(data, label)
    optimizer(grads)
    return loss

def train_loop(model, dataset, loss_fn):
    size = dataset.get_dataset_size()
    model.set_train()
    for batch, (data, label) in enumerate(dataset.create_tuple_iterator()):
        loss = train_step(data, label)
        if batch % 100 == 0:
            loss_value = loss.asnumpy()
            print(f"loss: {loss_value:>7f}  [{batch:>3d}/{size:>3d}]")

def test_loop(model, dataset, loss_fn):
    num_batches = dataset.get_dataset_size()
    model.set_train(False)
    total, test_loss, correct = 0, 0, 0
    for data, label in dataset.create_tuple_iterator():
        pred = model(data)
        total += len(data)
        test_loss += loss_fn(pred, label).asnumpy()
        correct += (pred.argmax(1) == label.argmax(1)).asnumpy().sum()
    test_loss /= num_batches
    correct /= total
    print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# Step 7: 开始训练与测试
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(model, train_dataset, loss_fn)
    test_loop(model, test_dataset, loss_fn)
print("Done!")

# Step 8: 使用模型进行推理
# 创建一个形状为 (1, input_size) 的全为 1 的形状作为输入
X_infer = Tensor(np.ones((1, input_size)), ms.float32)
# 使用模型进行前向传播，获取输出 logits
y_pred = model(X_infer)
# 打印预测结果
print(f"Predicted class probabilities: {y_pred}")

Epoch 1
-------------------------------
loss: 4.759555  [  0/2850]
loss: 3.973511  [100/2850]
loss: 4.104115  [200/2850]
loss: 3.984339  [300/2850]
loss: 3.739089  [400/2850]
loss: 4.028446  [500/2850]
loss: 3.910594  [600/2850]
loss: 3.568999  [700/2850]
loss: 4.055288  [800/2850]
loss: 4.027919  [900/2850]
loss: 3.271993  [1000/2850]
loss: 3.691371  [1100/2850]
loss: 3.728622  [1200/2850]
loss: 3.668241  [1300/2850]
loss: 3.562929  [1400/2850]
loss: 3.645488  [1500/2850]
loss: 3.602694  [1600/2850]
loss: 3.399765  [1700/2850]
loss: 3.629832  [1800/2850]
loss: 3.382724  [1900/2850]
loss: 3.915841  [2000/2850]
loss: 3.472497  [2100/2850]
loss: 3.343732  [2200/2850]
loss: 3.776298  [2300/2850]
loss: 3.547395  [2400/2850]
loss: 3.523124  [2500/2850]
loss: 3.872051  [2600/2850]
loss: 3.494771  [2700/2850]
loss: 3.804004  [2800/2850]
Test: 
 Accuracy: 14.2%, Avg loss: 3.597808 

Epoch 2
-------------------------------
loss: 3.848297  [  0/2850]
loss: 3.436370  [100/2850]
loss: 3.921732  [2

In [2]:
import mindspore.nn as nn
import mindspore.dataset as ds
import mindspore as ms
from mindspore import Tensor, Model,context
import mindspore.dataset.vision as vision
import mindspore.dataset.transforms as transforms
from mindspore.train.callback import LossMonitor
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
# 设置运行环境为 GPU
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# Step 1: 加载并处理数据集
df = pd.read_csv("processed_dataset.csv")

# 将 'explicit' 列转换为数值型 (True/False 转换为 1/0)
df['explicit'] = df['explicit'].astype(int)

# 将类别特征 'track_genre' 做 One-Hot 编码
encoder = OneHotEncoder(sparse_output=False)
track_genre_encoded = encoder.fit_transform(df[['track_genre']])

# 将编码后的数据添加回数据框并移除原始的 'track_genre' 列
encoded_columns = encoder.get_feature_names_out(['track_genre'])
df_encoded = pd.DataFrame(track_genre_encoded, columns=encoded_columns)
df = pd.concat([df.drop(columns=['track_genre']), df_encoded], axis=1)

# 对类别特征 'key', 'mode', 'time_signature' 进行 One-Hot 编码
categorical_features = ['key', 'mode', 'time_signature']
categorical_encoder = OneHotEncoder(sparse_output=False, drop='first')
categorical_encoded = categorical_encoder.fit_transform(df[categorical_features])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=categorical_encoder.get_feature_names_out(categorical_features))

# 合并编码后的数据和数值特征，并移除原始的类别特征
df = pd.concat([df.drop(columns=categorical_features).reset_index(drop=True), categorical_encoded_df], axis=1)

# 标准化数值特征
numerical_features = df.select_dtypes(include=['float64', 'int64'])
scaler = StandardScaler()
df[numerical_features.columns] = scaler.fit_transform(numerical_features)

# 将标签转换为 float32 类型
df_encoded = df_encoded.astype(np.float32)

# 提取特征和标签
X = df.drop(columns=encoded_columns).astype(np.float32).values
y = df_encoded.values

# Step 2: 定义一个简单的 MLP 分类器
class MLPClassifier(nn.Cell):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLPClassifier, self).__init__()
        # 输入层到第一层隐藏层的全连接层
        self.fc1 = nn.Dense(input_size, hidden_sizes[0], weight_init="normal")
        # 激活函数 ReLU
        self.relu1 = nn.ReLU()
        # 第一层隐藏层到第二层隐藏层的全连接层
        self.fc2 = nn.Dense(hidden_sizes[0], hidden_sizes[1], weight_init="normal")
        # 激活函数 ReLU
        self.relu2 = nn.ReLU()
        # 第二层隐藏层到输出层的全连接层
        self.fc3 = nn.Dense(hidden_sizes[1], output_size, weight_init="normal")

    def construct(self, x):
        # 数据通过输入层进入第一层隐藏层
        x = self.fc1(x)
        # 使用 ReLU 激活函数
        x = self.relu1(x)
        # 数据通过第一层隐藏层进入第二层隐藏层
        x = self.fc2(x)
        # 使用 ReLU 激活函数
        x = self.relu2(x)
        # 数据通过第二层隐藏层进入输出层
        x = self.fc3(x)
        return x

# Step 3: 创建模型实例
input_size = X.shape[1]  # 输入层的特征数量
hidden_sizes = [50, 100]  # 隐藏层中的神经元数量
output_size = y.shape[1]  # 输出层的神经元数量（类别数量）
model = MLPClassifier(input_size, hidden_sizes, output_size)

# Step 4: 定义超参数、损失函数和优化器
epochs = 8
batch_size = 32
learning_rate = 0.01

loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False, reduction='mean')  # 损失函数
optimizer = nn.Adam(params=model.trainable_params(), learning_rate=learning_rate)  # 优化器

# Step 5: 割分训练集和测试集
# 将数据集割分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_dataset = ds.NumpySlicesDataset(data=(X_train, y_train), column_names=['features', 'labels'], shuffle=True).batch(batch_size)
test_dataset = ds.NumpySlicesDataset(data=(X_test, y_test), column_names=['features', 'labels'], shuffle=False).batch(batch_size)

# Step 6: 定义训练和测试函数
def forward_fn(data, label):
    logits = model(data)
    loss = loss_fn(logits, label)
    return loss, logits

grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

def train_step(data, label):
    (loss, _), grads = grad_fn(data, label)
    optimizer(grads)
    return loss

def train_loop(model, dataset, loss_fn):
    size = dataset.get_dataset_size()
    model.set_train()
    for batch, (data, label) in enumerate(dataset.create_tuple_iterator()):
        loss = train_step(data, label)
        if batch % 100 == 0:
            loss_value = loss.asnumpy()
            print(f"loss: {loss_value:>7f}  [{batch:>3d}/{size:>3d}]")

def test_loop(model, dataset, loss_fn):
    num_batches = dataset.get_dataset_size()
    model.set_train(False)
    total, test_loss, correct = 0, 0, 0
    for data, label in dataset.create_tuple_iterator():
        pred = model(data)
        total += len(data)
        test_loss += loss_fn(pred, label).asnumpy()
        correct += (pred.argmax(1) == label.argmax(1)).asnumpy().sum()
    test_loss /= num_batches
    correct /= total
    print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# Step 7: 开始训练与测试
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(model, train_dataset, loss_fn)
    test_loop(model, test_dataset, loss_fn)
print("Done!")

# Step 8: 使用模型进行推理
# 创建一个形状为 (1, input_size) 的全为 1 的形状作为输入
X_infer = Tensor(np.ones((1, input_size)), ms.float32)
# 使用模型进行前向传播，获取输出 logits
y_pred = model(X_infer)
# 打印预测结果
print(f"Predicted class probabilities: {y_pred}")

Epoch 1
-------------------------------


[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:41.497.224 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:41.497.269 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:41.497.288 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:41.497.308 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]


loss: 4.751053  [  0/2850]
loss: 4.381146  [100/2850]
loss: 3.770947  [200/2850]
loss: 3.517884  [300/2850]
loss: 4.052758  [400/2850]
loss: 3.977578  [500/2850]
loss: 3.172975  [600/2850]
loss: 3.694036  [700/2850]
loss: 4.385149  [800/2850]
loss: 3.965744  [900/2850]
loss: 3.912257  [1000/2850]
loss: 3.987021  [1100/2850]
loss: 4.232032  [1200/2850]
loss: 3.177134  [1300/2850]
loss: 3.458052  [1400/2850]
loss: 4.127619  [1500/2850]
loss: 3.224372  [1600/2850]
loss: 3.719158  [1700/2850]
loss: 3.849706  [1800/2850]
loss: 3.815284  [1900/2850]
loss: 3.405531  [2000/2850]
loss: 3.632096  [2100/2850]
loss: 3.390835  [2200/2850]
loss: 3.478814  [2300/2850]
loss: 3.842928  [2400/2850]
loss: 3.734197  [2500/2850]
loss: 3.250527  [2600/2850]
loss: 3.887791  [2700/2850]
loss: 3.505733  [2800/2850]


[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:49.653.123 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:49.653.197 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:51.649.432 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:37:51.649.471 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]


Test: 
 Accuracy: 14.5%, Avg loss: 3.557605 

Epoch 2
-------------------------------
loss: 4.062263  [  0/2850]
loss: 3.758794  [100/2850]
loss: 3.526545  [200/2850]
loss: 3.984953  [300/2850]
loss: 3.641108  [400/2850]
loss: 4.017857  [500/2850]
loss: 3.696941  [600/2850]
loss: 3.560792  [700/2850]
loss: 3.940561  [800/2850]
loss: 3.793410  [900/2850]
loss: 3.546626  [1000/2850]
loss: 3.499299  [1100/2850]
loss: 3.584493  [1200/2850]
loss: 3.412812  [1300/2850]
loss: 3.321845  [1400/2850]
loss: 3.223374  [1500/2850]
loss: 3.533934  [1600/2850]
loss: 3.495475  [1700/2850]
loss: 3.511067  [1800/2850]
loss: 3.160665  [1900/2850]
loss: 3.463381  [2000/2850]
loss: 3.674786  [2100/2850]
loss: 3.719467  [2200/2850]
loss: 3.383220  [2300/2850]
loss: 3.703263  [2400/2850]
loss: 3.462914  [2500/2850]
loss: 3.441289  [2600/2850]
loss: 3.415045  [2700/2850]
loss: 3.958128  [2800/2850]
Test: 
 Accuracy: 14.7%, Avg loss: 3.551239 

Epoch 3
-------------------------------
loss: 2.895353  [  0/2850]

[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:39:04.771.351 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:39:04.771.393 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2310715098.py]


In [3]:
import mindspore.nn as nn
import mindspore.dataset as ds
import mindspore as ms
from mindspore import Tensor, Model,context
import mindspore.dataset.vision as vision
import mindspore.dataset.transforms as transforms
from mindspore.train.callback import LossMonitor
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
# 设置运行环境为 GPU
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# Step 1: 加载并处理数据集
df = pd.read_csv("processed_dataset.csv")

# 将 'explicit' 列转换为数值型 (True/False 转换为 1/0)
df['explicit'] = df['explicit'].astype(int)

# 将类别特征 'track_genre' 做 One-Hot 编码
encoder = OneHotEncoder(sparse_output=False)
track_genre_encoded = encoder.fit_transform(df[['track_genre']])

# 将编码后的数据添加回数据框并移除原始的 'track_genre' 列
encoded_columns = encoder.get_feature_names_out(['track_genre'])
df_encoded = pd.DataFrame(track_genre_encoded, columns=encoded_columns)
df = pd.concat([df.drop(columns=['track_genre']), df_encoded], axis=1)

# 对类别特征 'key', 'mode', 'time_signature' 进行 One-Hot 编码
categorical_features = ['key', 'mode', 'time_signature']
categorical_encoder = OneHotEncoder(sparse_output=False, drop='first')
categorical_encoded = categorical_encoder.fit_transform(df[categorical_features])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=categorical_encoder.get_feature_names_out(categorical_features))

# 合并编码后的数据和数值特征，并移除原始的类别特征
df = pd.concat([df.drop(columns=categorical_features).reset_index(drop=True), categorical_encoded_df], axis=1)

# 标准化数值特征
numerical_features = df.select_dtypes(include=['float64', 'int64'])
scaler = StandardScaler()
df[numerical_features.columns] = scaler.fit_transform(numerical_features)

# 将标签转换为 float32 类型
df_encoded = df_encoded.astype(np.float32)

# 提取特征和标签
X = df.drop(columns=encoded_columns).astype(np.float32).values
y = df_encoded.values

# Step 2: 定义一个简单的 MLP 分类器
class MLPClassifier(nn.Cell):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLPClassifier, self).__init__()
        # 输入层到第一层隐藏层的全连接层
        self.fc1 = nn.Dense(input_size, hidden_sizes[0], weight_init="normal")
        # 激活函数 ReLU
        self.relu1 = nn.ReLU()
        # 第一层隐藏层到第二层隐藏层的全连接层
        self.fc2 = nn.Dense(hidden_sizes[0], hidden_sizes[1], weight_init="normal")
        # 激活函数 ReLU
        self.relu2 = nn.ReLU()
        # 第二层隐藏层到输出层的全连接层
        self.fc3 = nn.Dense(hidden_sizes[1], output_size, weight_init="normal")

    def construct(self, x):
        # 数据通过输入层进入第一层隐藏层
        x = self.fc1(x)
        # 使用 ReLU 激活函数
        x = self.relu1(x)
        # 数据通过第一层隐藏层进入第二层隐藏层
        x = self.fc2(x)
        # 使用 ReLU 激活函数
        x = self.relu2(x)
        # 数据通过第二层隐藏层进入输出层
        x = self.fc3(x)
        return x

# Step 3: 创建模型实例
input_size = X.shape[1]  # 输入层的特征数量
hidden_sizes = [100, 100]  # 隐藏层中的神经元数量
output_size = y.shape[1]  # 输出层的神经元数量（类别数量）
model = MLPClassifier(input_size, hidden_sizes, output_size)

# Step 4: 定义超参数、损失函数和优化器
epochs = 8
batch_size = 64
learning_rate = 0.01

loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False, reduction='mean')  # 损失函数
optimizer = nn.Adam(params=model.trainable_params(), learning_rate=learning_rate)  # 优化器

# Step 5: 割分训练集和测试集
# 将数据集割分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_dataset = ds.NumpySlicesDataset(data=(X_train, y_train), column_names=['features', 'labels'], shuffle=True).batch(batch_size)
test_dataset = ds.NumpySlicesDataset(data=(X_test, y_test), column_names=['features', 'labels'], shuffle=False).batch(batch_size)

# Step 6: 定义训练和测试函数
def forward_fn(data, label):
    logits = model(data)
    loss = loss_fn(logits, label)
    return loss, logits

grad_fn = ms.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

def train_step(data, label):
    (loss, _), grads = grad_fn(data, label)
    optimizer(grads)
    return loss

def train_loop(model, dataset, loss_fn):
    size = dataset.get_dataset_size()
    model.set_train()
    for batch, (data, label) in enumerate(dataset.create_tuple_iterator()):
        loss = train_step(data, label)
        if batch % 100 == 0:
            loss_value = loss.asnumpy()
            print(f"loss: {loss_value:>7f}  [{batch:>3d}/{size:>3d}]")

def test_loop(model, dataset, loss_fn):
    num_batches = dataset.get_dataset_size()
    model.set_train(False)
    total, test_loss, correct = 0, 0, 0
    for data, label in dataset.create_tuple_iterator():
        pred = model(data)
        total += len(data)
        test_loss += loss_fn(pred, label).asnumpy()
        correct += (pred.argmax(1) == label.argmax(1)).asnumpy().sum()
    test_loss /= num_batches
    correct /= total
    print(f"Test: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# Step 7: 开始训练与测试
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(model, train_dataset, loss_fn)
    test_loop(model, test_dataset, loss_fn)
print("Done!")

# Step 8: 使用模型进行推理
# 创建一个形状为 (1, input_size) 的全为 1 的形状作为输入
X_infer = Tensor(np.ones((1, input_size)), ms.float32)
# 使用模型进行前向传播，获取输出 logits
y_pred = model(X_infer)
# 打印预测结果
print(f"Predicted class probabilities: {y_pred}")

Epoch 1
-------------------------------


[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:15.432.774 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:15.432.811 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:15.432.828 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:15.432.857 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]


loss: 4.739250  [  0/1425]
loss: 3.901616  [100/1425]
loss: 3.844621  [200/1425]
loss: 3.843393  [300/1425]
loss: 3.574919  [400/1425]
loss: 3.427258  [500/1425]
loss: 3.609534  [600/1425]
loss: 3.589840  [700/1425]
loss: 3.652241  [800/1425]
loss: 3.453996  [900/1425]
loss: 3.672565  [1000/1425]
loss: 3.564564  [1100/1425]
loss: 3.459152  [1200/1425]
loss: 3.507923  [1300/1425]
loss: 3.547889  [1400/1425]


[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:22.991.193 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:22.991.238 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:25.208.620 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:40:25.208.663 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]


Test: 
 Accuracy: 15.3%, Avg loss: 3.515040 

Epoch 2
-------------------------------
loss: 3.212141  [  0/1425]
loss: 3.419340  [100/1425]
loss: 3.615645  [200/1425]
loss: 3.557674  [300/1425]
loss: 3.624898  [400/1425]
loss: 3.495601  [500/1425]
loss: 3.933043  [600/1425]
loss: 3.223706  [700/1425]
loss: 3.419834  [800/1425]
loss: 3.515543  [900/1425]
loss: 3.309186  [1000/1425]
loss: 3.384953  [1100/1425]
loss: 3.563389  [1200/1425]
loss: 3.347105  [1300/1425]
loss: 3.410980  [1400/1425]
Test: 
 Accuracy: 16.8%, Avg loss: 3.448343 

Epoch 3
-------------------------------
loss: 3.180415  [  0/1425]
loss: 3.396326  [100/1425]
loss: 3.157975  [200/1425]
loss: 3.159078  [300/1425]
loss: 3.218615  [400/1425]
loss: 3.291227  [500/1425]
loss: 3.338558  [600/1425]
loss: 3.184371  [700/1425]
loss: 3.424456  [800/1425]
loss: 3.255927  [900/1425]
loss: 3.534780  [1000/1425]
loss: 3.312005  [1100/1425]
loss: 3.347055  [1200/1425]
loss: 3.405071  [1300/1425]
loss: 3.374568  [1400/1425]
Test: 
 

[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:41:05.694.153 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
[ERROR] CORE(439,7f76396f14c0,python):2024-10-14-13:41:05.694.196 [mindspore/core/utils/file_utils.cc:253] GetRealPath] Get realpath failed, path[/tmp/ipykernel_439/2521907368.py]
