In [19]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import (
    accuracy_score, f1_score, confusion_matrix,
    mean_absolute_error, mean_squared_error, r2_score
)

import gdown


In [20]:
# Загрузка датасетов
bike_url = "https://drive.google.com/uc?id=13XkpwUaAWwGSwUJ1WXn-0HfT2xPXtVQ5"
gdown.download(bike_url, "seoul_bike.csv", quiet=False)
bike_df = pd.read_csv("seoul_bike.csv", encoding="cp1252")

details_url = "https://drive.google.com/uc?id=1wslcaUNHmRHH3wF4x4X1M9CgsFIz26wk"
gdown.download(details_url, "details.csv", quiet=False)
details_df = pd.read_csv("details.csv")

bike_df.head()




Downloading...
From: https://drive.google.com/uc?id=13XkpwUaAWwGSwUJ1WXn-0HfT2xPXtVQ5
To: /content/seoul_bike.csv
100%|██████████| 604k/604k [00:00<00:00, 78.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1wslcaUNHmRHH3wF4x4X1M9CgsFIz26wk
To: /content/details.csv
100%|██████████| 760k/760k [00:00<00:00, 73.1MB/s]


Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes


In [21]:
details_df.head()

Unnamed: 0,ProductionVolume,ProductionCost,SupplierQuality,DeliveryDelay,DefectRate,QualityScore,MaintenanceHours,DowntimePercentage,InventoryTurnover,StockoutRate,WorkerProductivity,SafetyIncidents,EnergyConsumption,EnergyEfficiency,AdditiveProcessTime,AdditiveMaterialCost,DefectStatus
0,202,13175.403783,86.648534,1,3.121492,63.463494,9,0.052343,8.630515,0.081322,85.042379,0,2419.616785,0.468947,5.551639,236.439301,1
1,535,19770.046093,86.310664,4,0.819531,83.697818,20,4.908328,9.296598,0.038486,99.657443,7,3915.566713,0.119485,9.080754,353.957631,1
2,960,19060.820997,82.132472,0,4.514504,90.35055,1,2.464923,5.097486,0.002887,92.819264,2,3392.385362,0.496392,6.562827,396.189402,1
3,370,5647.606037,87.335966,5,0.638524,67.62869,8,4.692476,3.577616,0.055331,96.887013,8,4652.400275,0.183125,8.097496,164.13587,1
4,206,7472.222236,81.989893,3,3.867784,82.728334,9,2.746726,6.851709,0.068047,88.315554,7,1581.630332,0.263507,6.406154,365.708964,1


In [22]:
# ПОДГОТОВКА БЕЙЗЛАЙНА

#  BIKE (регрессия)
bike = bike_df.copy()
bike = bike.drop(columns=["Date"])

# Label Encoding для категорий
for col in ["Seasons", "Holiday", "Functioning Day"]:
    bike[col] = LabelEncoder().fit_transform(bike[col])

X_bike = bike.drop(columns=["Rented Bike Count"])
y_bike = bike["Rented Bike Count"]

#  DETAILS (классификация)
details = details_df.copy()

X_det = details.drop(columns=["DefectStatus"])
y_det = details["DefectStatus"]

In [23]:
# TRAIN
X_train_b, X_test_b, y_train_b, y_test_b = train_test_split(
    X_bike, y_bike, test_size=0.8, random_state=42
)

linreg = LinearRegression()
linreg.fit(X_train_b, y_train_b)
bike_pred = linreg.predict(X_test_b)

# train
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(
    X_det, y_det, test_size=0.8, random_state=42
)

logreg = LogisticRegression(max_iter=10000)
logreg.fit(X_train_d, y_train_d)
det_pred = logreg.predict(X_test_d)

In [24]:
# ОЦЕНКА МЕТРИК

# Регрессия
mae = mean_absolute_error(y_test_b, bike_pred)
mse = mean_squared_error(y_test_b, bike_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_b, bike_pred)

print("\n=== Baseline: Linear Regression (Bike Sharing) ===")
print(f"MAE:  {mae:.3f}")
print(f"MSE:  {mse:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"R²:   {r2:.3f}")

# Классификация
acc = accuracy_score(y_test_d, det_pred)
f1 = f1_score(y_test_d, det_pred)
cm = confusion_matrix(y_test_d, det_pred)

print("\n=== Baseline: Logistic Regression (Manufacturing Defects) ===")
print(f"Accuracy: {acc:.3f}")
print(f"F1-score: {f1:.3f}")
print("Confusion matrix:\n", cm)


=== Baseline: Linear Regression (Bike Sharing) ===
MAE:  325.352
MSE:  189139.875
RMSE: 434.902
R²:   0.545

=== Baseline: Logistic Regression (Manufacturing Defects) ===
Accuracy: 0.875
F1-score: 0.929
Confusion matrix:
 [[ 164  242]
 [  82 2104]]
