# 🚲 Прогнозирование типа пользователя Divvy Bike
Цель: сравнить эффективность персептрона, случайного леса и XGBoost.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

## 📂 Загрузка данных

In [16]:
df = pd.read_csv('../data/trips.csv', encoding="utf_16_le")
df = df.sample(n=10000, random_state=42).copy()

## 🔧 Предобработка данных

In [17]:
# Удаляем лишние колонки
df.drop(['start_station_name', 'end_station_name'], axis=1, inplace=True)

# Преобразуем время в удобные признаки
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])
df['duration_sec'] = (df['ended_at'] - df['started_at']).dt.total_seconds()
df['hour_of_day'] = df['started_at'].dt.hour

# Кодируем категориальные признаки
le = LabelEncoder()
df['member_casual'] = le.fit_transform(df['member_casual'])  # member -> 0, casual -> 1
df['rideable_type'] = LabelEncoder().fit_transform(df['rideable_type'])

# Выбираем фичи и таргет
X = df[['hour_of_day', 'rideable_type', 'duration_sec']]
y = df['member_casual']

# Нормализация
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Разделение на train/test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

## 🤖 Обучение моделей

In [18]:
# Персептрон
perceptron = Perceptron(max_iter=1000, tol=1e-3)
perceptron.fit(X_train, y_train)
y_pred_p = perceptron.predict(X_test)

# Случайный лес
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


## 📊 Сравнение моделей

In [20]:
acc_p = accuracy_score(y_test, y_pred_p)
acc_rf = accuracy_score(y_test, y_pred_rf)
acc_xgb = accuracy_score(y_test, y_pred_xgb)

print(f'Персептрон: {acc_p:.4f}')
print(f'Случайный лес: {acc_rf:.4f}')
print(f'XGBoost: {acc_xgb:.4f}')

Персептрон: 0.7420
Случайный лес: 0.7460
XGBoost: 0.8177
