In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from typing import Any
import matplotlib.pyplot as plt
import pandas as pd
import re
import seaborn as sns


def show_plt(data: Any, title: str, xlabel: str) -> None:
    sns.histplot(data, kde=True)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.grid(True)
    plt.show()


def show_result_plt(y_test: Any, y_pred: Any, r2: Any) -> None:
    plt.figure(figsize=(10, 8))
    sns.set_style("whitegrid")
    plt.scatter(y_test, y_pred, alpha=0.6, color='blue', label='Предсказания')
    max_val = max(max(y_test), max(y_pred)) + 2
    min_val = min(min(y_test), min(y_pred)) - 2
    plt.plot([min_val, max_val], [min_val, max_val],
            '--', color='red', linewidth=2, label='Идеальное соответствие')

    plt.xlabel('Фактические значения Y1', fontsize=12)
    plt.ylabel('Предсказанные значения Y1', fontsize=12)
    plt.title('Сравнение фактических и предсказанных значений Y1', fontsize=14)
    plt.legend(fontsize=12)
    plt.grid(True, alpha=0.3)

    plt.text(0.05, 0.95, f'R2 = {r2:.3f}', transform=plt.gca().transAxes,
            fontsize=12, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

    plt.tight_layout()
    plt.show()

In [None]:
df = pd.read_csv('out.csv')
df.info()

In [None]:
df['beforeWeighing'] = df['weighingStartTime'] - df['startTime']
df['weighingTime'] = df['weighingEndTime'] - df['weighingStartTime']
df['beforeCheckout'] = df['checkoutStartTime'] - df['weighingEndTime']
df['checkoutTime'] = df['checkoutEndTime'] - df['checkoutStartTime']

df.drop(['startTime', 'weighingStartTime', 'weighingEndTime', 'checkoutStartTime', 'checkoutEndTime'], axis=1, inplace=True)
df.info()

In [None]:
plt_names = [re.sub('[A-Z]', lambda m: f' {m[0].lower()}', column).capitalize() for column in df.columns]

for plt_name, column_name in zip(plt_names, df.columns):
    show_plt(df[column_name], plt_name, 'Seconds')

In [None]:
df_x = df.drop(['totalTime'], axis=1)
df_y = df['totalTime']

x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.30, random_state=42)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)

In [None]:
show_result_plt(y_test, y_pred, r2)