In [14]:
import pandas as pd
import numpy as np
import os
import re

# 尝试导入 sklearn，如果失败则使用手动实现
try:
    from sklearn.model_selection import train_test_split
    USE_SKLEARN = True
except ImportError as e:
    print(f"警告: sklearn 导入失败 ({e})，将使用手动实现的 train_test_split")
    USE_SKLEARN = False
    
    def train_test_split(X, y, test_size=0.2, random_state=None):
        """手动实现的 train_test_split，不依赖 sklearn"""
        if random_state is not None:
            np.random.seed(random_state)
        
        n_samples = len(X)
        n_test = int(n_samples * test_size)
        indices = np.random.permutation(n_samples)
        
        test_indices = indices[:n_test]
        train_indices = indices[n_test:]
        
        X_train, X_test = X[train_indices], X[test_indices]
        y_train, y_test = y[train_indices], y[test_indices]
        
        return X_train, X_test, y_train, y_test

def extract_numeric_features(data):
    feature_columns = ['充放电次数', '环境温度', '当前电量']
    numeric_data = pd.DataFrame()

    for col in feature_columns:
        if col in data.columns:
            extracted_values = []
            for value in data[col]:
                if pd.isna(value):
                    extracted_values.append(np.nan)
                else:
                    matches = re.findall(r'\d+\.?\d*', str(value))
                    if matches:
                        try:
                            extracted_values.append(float(matches[0]))
                        except ValueError:
                            extracted_values.append(np.nan)
                    else:
                        extracted_values.append(np.nan)
            numeric_data[col] = extracted_values
    return numeric_data


def load_battery_data(file_path, test_ratio=0.2, random_state=None):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Excel文件 {file_path} 不存在")

    try:
        data = pd.read_excel(file_path)
    except Exception as e:
        raise ValueError(f"读取Excel文件失败: {str(e)}")


    numeric_features = extract_numeric_features(data)

    label_column = '电池量'
    y = data[label_column].values

    X = numeric_features.values
    
    # 使用已导入的 train_test_split（可能是 sklearn 或手动实现）
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=random_state
    )
    
    return X_train, X_test, y_train, y_test


警告: sklearn 导入失败 (The `scipy` install you are using seems to be broken, (extension modules cannot be imported), please try reinstalling.)，将使用手动实现的 train_test_split


In [15]:
from collections import OrderedDict

class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self,x):
        self.x = x
        out = np.dot(x,self.W) + self.b
        return out
    
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        return dx
    
class AddPower:
    def __init__(self,exponent):
        self.exponent = exponent
        self.x = None
        self.dExponent = None
    
    def forward(self,x):
        x += 1e-7
        self.x = x
        out = np.power(x,self.exponent)
        return out
    
    def backward(self,dout):
        dx = np.power(self.x,self.exponent-1) * self.exponent * dout
        dExponent_full = self.x ** self.exponent * np.log(self.x) * dout
        self.dExponent = np.sum(dExponent_full, axis=0)
        return dx

def mse(y,t):
    return np.mean((y-t)**2) * 0.5

class MSE:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,y,t):
        self.y = y
        self.t = t
        self.loss = mse(y,t)
        return self.loss
    
    def backward(self,dout=1):
        batch_size = self.t.shape[0]
        if len(self.t.shape) == 1:
            t_reshaped = self.t.reshape(-1, 1)
        else:
            t_reshaped = self.t
        dx = (self.y - t_reshaped) / batch_size
        return dx

class battery_model:
    def __init__(self,input_size=3,output_size=1,weight_init=0.1):
        self.params = {}
        self.params['exponent'] = np.ones(input_size)
        self.params['W1'] = np.random.randn(input_size,output_size) * weight_init
        self.params['b1'] = np.zeros(output_size)
        
        self.layers = OrderedDict()
        self.layers['AddPower'] = AddPower(self.params['exponent'])
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        
        self.loss_layer = MSE()
        
        
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self,x,t):
        y = self.predict(x)
        if len(t.shape) == 1:
            t = t.reshape(-1, 1)
        return self.loss_layer.forward(y,t)
    
    def gradient(self,x,t):
        if len(t.shape) == 1:
            t = t.reshape(-1, 1)
        self.loss(x,t)
        dout = self.loss_layer.backward()
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        gradient = {
                'exponent': self.layers['AddPower'].dExponent,
                'W1': self.layers['Affine1'].dW,
                'b1': self.layers['Affine1'].db
        }
        
        return gradient
    
    def accuracy(self,x,t,threshold=0.05):
        y = self.predict(x)
        
        if len(t.shape) == 1:
            t = t.reshape(-1, 1)

        relative_error = np.abs((y - t) / (t + 1e-7))

        correct = relative_error <= threshold
        return np.mean(correct)

In [16]:
def batch_train(model, X_train, y_train, X_test=None, y_test=None, epochs=10, batch_size=100, learning_rate=0.01, verbose=True):

    num_samples = X_train.shape[0]
    num_batches = num_samples // batch_size
    
    if num_samples % batch_size != 0:
        num_batches += 1
        
    history = {
        'train_loss': [],
        'test_loss': [],
        'train_accuracy': [],
        'test_accuracy': []
    }
    
    for epoch in range(epochs):
        indices = np.random.permutation(num_samples)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]
        
        epoch_loss = 0
        
        for i in range(num_batches):
            start_idx = i * batch_size
            end_idx = min((i + 1) * batch_size, num_samples)
            
            X_batch = X_train_shuffled[start_idx:end_idx]
            y_batch = y_train_shuffled[start_idx:end_idx]
            
            grads = model.gradient(X_batch, y_batch)

            model.params['W1'] -= learning_rate * grads['W1']
            model.params['b1'] -= learning_rate * grads['b1']
            model.params['exponent'] -= learning_rate * grads['exponent']
            
            batch_loss = model.loss(X_batch, y_batch)
            epoch_loss += batch_loss
            
        avg_loss = epoch_loss / num_batches
        history['train_loss'].append(avg_loss)
        
        train_acc = model.accuracy(X_train, y_train)
        history['train_accuracy'].append(train_acc)
        
        if X_test is not None and y_test is not None:
            test_loss = model.loss(X_test, y_test)
            test_acc = model.accuracy(X_test, y_test)
            history['test_loss'].append(test_loss)
            history['test_accuracy'].append(test_acc)
            
            if verbose and (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f} - Test Loss: {test_loss:.4f} - Train Acc: {train_acc:.4f} - Test Acc: {test_acc:.4f}")
        else:
            if verbose and (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f} - Train Acc: {train_acc:.4f}")
                
    return history


In [25]:
X_train, X_test, y_train, y_test = load_battery_data('BatteryData.xlsx')

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(800, 1) (200, 1) (800,) (200,)


In [30]:
model = battery_model()

history = batch_train(model, X_train, y_train, X_test, y_test,epochs=100, batch_size=100, learning_rate=0.01)

Epoch 10/100 - Loss: 0.1715 - Test Loss: 0.1471 - Train Acc: 0.0000 - Test Acc: 0.0000
Epoch 20/100 - Loss: 0.0285 - Test Loss: 0.0238 - Train Acc: 0.0312 - Test Acc: 0.0500
Epoch 30/100 - Loss: 0.0103 - Test Loss: 0.0090 - Train Acc: 0.4800 - Test Acc: 0.4600
Epoch 40/100 - Loss: 0.0076 - Test Loss: 0.0071 - Train Acc: 0.3950 - Test Acc: 0.3400
Epoch 50/100 - Loss: 0.0071 - Test Loss: 0.0069 - Train Acc: 0.3525 - Test Acc: 0.3250
Epoch 60/100 - Loss: 0.0069 - Test Loss: 0.0068 - Train Acc: 0.3250 - Test Acc: 0.3200
Epoch 70/100 - Loss: 0.0068 - Test Loss: 0.0067 - Train Acc: 0.3250 - Test Acc: 0.3200
Epoch 80/100 - Loss: 0.0067 - Test Loss: 0.0066 - Train Acc: 0.3412 - Test Acc: 0.3350
Epoch 90/100 - Loss: 0.0066 - Test Loss: 0.0065 - Train Acc: 0.3412 - Test Acc: 0.3350
Epoch 100/100 - Loss: 0.0065 - Test Loss: 0.0064 - Train Acc: 0.3412 - Test Acc: 0.3350


In [19]:
print(history['train_loss'])
print(history['test_loss'])

[np.float64(22.568895448712816), np.float64(0.6790408526633146), np.float64(0.5586616922833635), np.float64(0.46262839494792724), np.float64(0.3848225322561779), np.float64(0.32113573089083297), np.float64(0.26864897958360073), np.float64(0.22517253261312264), np.float64(0.18902346329090497), np.float64(0.15888696408745911), np.float64(0.1337039127013272), np.float64(0.11262192138542665), np.float64(0.09494918517929249), np.float64(0.08011702182848826), np.float64(0.06765555788848308), np.float64(0.057177833222968696), np.float64(0.048361736179516786), np.float64(0.04093957013375974), np.float64(0.034686810648417984), np.float64(0.029418032853844377), np.float64(0.024976802635592057), np.float64(0.021231094766459548), np.float64(0.0180711087357286), np.float64(0.015404653537827434), np.float64(0.01315387191641959), np.float64(0.011254424169583891), np.float64(0.009649820311402385), np.float64(0.008295106584511021), np.float64(0.0071509296587408695), np.float64(0.006184132495349241), np