In [19]:
import pandas as pd
# Load the dataset
file_path = 'training_data.csv'
data = pd.read_csv(file_path)
# Display the first few rows of the dataset to understand its structure
data.head()

Unnamed: 0,hour_of_day,day_of_week,month,holiday,weekday,summertime,temp,dew,humidity,precip,snow,snowdepth,windspeed,cloudcover,visibility,increase_stock
0,5,5,1,0,0,0,-7.2,-15.0,53.68,0.0,0,0.0,16.3,31.6,16.0,low_bike_demand
1,21,4,1,0,1,0,-1.3,-12.8,40.97,0.0,0,0.0,23.9,85.7,16.0,low_bike_demand
2,21,3,8,0,1,1,26.9,21.8,73.39,0.0,0,0.0,0.0,81.1,16.0,low_bike_demand
3,1,6,1,0,0,0,3.1,-4.0,59.74,0.0,0,0.0,19.2,0.0,16.0,low_bike_demand
4,17,0,3,0,1,0,11.7,-11.4,18.71,0.0,0,0.0,10.5,44.6,16.0,low_bike_demand


In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# 编码目标变量
le = LabelEncoder()
data['increase_stock'] = le.fit_transform(data['increase_stock'])

# 将数据集分割为特征和目标
X = data.drop('increase_stock', axis=1)
y = data['increase_stock']

# 将数据分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)


In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import numpy as np

# 定义一个简单的深度神经网络
class SimpleDNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleDNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 神经网络参数
input_size = X_train.shape[1]
hidden_size = 64
num_classes = len(np.unique(y_train))
num_epochs = 100
batch_size = 64
learning_rate = 0.001

# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 转换为 PyTorch tensors
X_train_torch = torch.tensor(X_train_scaled.astype(np.float32))
y_train_torch = torch.tensor(y_train.values.astype(np.int64))
X_test_torch = torch.tensor(X_test_scaled.astype(np.float32))
y_test_torch = torch.tensor(y_test.values.astype(np.int64))

# 加载数据
train_dataset = TensorDataset(X_train_torch, y_train_torch)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# 初始化网络和优化器
model = SimpleDNN(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
model.train()
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# 评估模型性能
model.eval()
outputs = model(X_test_torch)
_, predicted = torch.max(outputs.data, 1)
accuracy_dnn = (predicted == y_test_torch).sum().item() / y_test_torch.size(0)

# 将 DNN 模型的预测结果与其他模型的结果结合起来进行软投票
# 注意: 这需要手动实现，因为 sklearn 的 VotingClassifier 不能直接集成 PyTorch 模型

In [22]:
accuracy_dnn

0.875

In [26]:
lr = LogisticRegression(max_iter=1200)
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()
knn = KNeighborsClassifier(n_neighbors=2)
random_forest = RandomForestClassifier()

In [30]:
from sklearn.preprocessing import OneHotEncoder
from scipy.special import softmax
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 获取 sklearn 模型的预测概率
lr.fit(X_train_scaled, y_train)
lda.fit(X_train_scaled, y_train)
qda.fit(X_train_scaled, y_train)
knn.fit(X_train_scaled, y_train)
random_forest.fit(X_train, y_train)
prob_lr = lr.predict_proba(X_test_scaled)
prob_lda = lda.predict_proba(X_test_scaled)
prob_qda = qda.predict_proba(X_test_scaled)
prob_knn = knn.predict_proba(X_test_scaled)
prob_rf = random_forest.predict_proba(X_test_scaled)

# 获取 DNN 模型的预测概率
dnn_outputs = model(X_test_torch)
prob_dnn = softmax(dnn_outputs.detach().numpy(), axis=1)

# 计算所有模型预测概率的平均值
average_probs = (prob_knn + prob_rf + prob_dnn)/3 #(prob_lr + prob_lda + prob_qda + prob_knn + prob_rf + prob_dnn) / 6

# 选择具有最高平均概率的类别作为最终预测
final_predictions = np.argmax(average_probs, axis=1)

# 计算最终的准确率
final_accuracy = np.mean(final_predictions == y_test)

final_accuracy




0.85

In [29]:
qda.score(X_test_scaled,y_test)

0.67

In [31]:
random_forest.score(X_test_scaled,y_test)



0.81