In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# 讀取資料
df = pd.read_excel("數據整理到8月版20250529.xlsx", sheet_name="芝山到天母校區")

# U-Bike 樣本標註
df['transport_mode'] = 1

# 建立對照組：模擬選擇公車的樣本
df_bus = df.sample(frac=0.5, random_state=42).copy()
df_bus['transport_mode'] = 0
np.random.seed(42)
df_bus['simulated_bus_time_min'] = np.random.randint(15, 26, size=len(df_bus))

# 整合資料
df_combined = pd.concat([df, df_bus], ignore_index=True)

# 時間處理：轉換租借時數為分鐘
def time_to_minutes(t):
    return t.hour * 60 + t.minute + t.second / 60

df_combined['rent_duration_min'] = df_combined['租借時數'].apply(time_to_minutes)
df_combined['hour'] = pd.to_datetime(df_combined['借車時間']).dt.hour
df_combined['is_peak'] = df_combined['hour'].apply(lambda x: 1 if 7 <= x <= 9 or 17 <= x <= 19 else 0)
df_combined['借車日期'] = pd.to_datetime(df_combined['借車日期'])
df_combined['is_weekend'] = df_combined['借車日期'].dt.weekday.apply(lambda x: 1 if x >= 5 else 0)

# 選擇模型變數
df_model = df_combined[[
    'rent_duration_min', 'simulated_bus_time_min', 'is_peak', 'is_weekend', 'transport_mode'
]].dropna()

# 特徵與標籤
X = df_model.drop('transport_mode', axis=1)
y = df_model['transport_mode']

# 切分資料
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 建立模型
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# 訓練與預測
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\\n{name} Results:")
    print(classification_report(y_test, y_pred))


ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: np.int64(0)