In [1]:

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline


from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

In [10]:
df = pd.read_csv('/content/drive/MyDrive/cargoes_boxes_1.csv')

In [20]:
df = df.drop(columns = ['Unnamed: 0', 'boxes'])

In [34]:
df.describe()

Unnamed: 0,cargo_width,cargo_height,cargo_length,density_percent,filling_space_percent,box_count,box_unique
count,619.0,619.0,619.0,619.0,619.0,620.0,620.0
mean,1946.200323,2430.851373,8241.630048,76.07004,52.694318,125.453226,9.522581
std,717.359642,380.918143,5348.366591,18.261031,23.564131,309.655908,12.677731
min,220.0,135.0,270.0,11.97505,5.32294,0.0,0.0
25%,1000.0,2300.0,1300.0,64.21483,35.14257,16.0,3.0
50%,2352.0,2590.0,12022.0,78.38664,54.08501,30.0,5.0
75%,2480.0,2698.0,13490.0,91.70113,72.270175,69.0,9.0
max,2600.0,3000.0,13620.0,100.0,98.4375,2849.0,96.0


In [35]:
df = df.drop(index=233)

In [45]:
df = df.drop(index=470)

In [46]:
df

Unnamed: 0,cargo_width,cargo_height,cargo_length,density_percent,filling_space_percent,box_count,box_unique
0,2400.0,2400.0,13300.0,91.90237,76.74275,28,7
1,2400.0,2400.0,13300.0,92.94660,80.69541,28,8
2,2400.0,2400.0,13300.0,97.69497,83.26620,58,9
3,2490.0,2590.0,13490.0,94.50057,73.31180,87,9
4,2400.0,2400.0,13300.0,97.69497,83.26620,58,9
...,...,...,...,...,...,...,...
615,2490.0,2590.0,13490.0,100.00000,58.26317,33,2
616,800.0,1650.0,1200.0,74.02778,67.29798,33,8
617,2500.0,2100.0,7500.0,94.44444,64.24381,17,1
618,800.0,2000.0,1200.0,82.50000,51.56250,33,2


In [47]:
X = df.drop('density_percent', axis=1)
y = df.density_percent

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3, random_state=12345)

In [39]:
# Создадим функцию
def try_model(model, X_train, y_train, X_test, y_test, model_params=None):
    if model_params is None:
        model_params = dict()
    print(model.__name__)
    pipeline = make_pipeline(StandardScaler(), model(**model_params))
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    print(f'MSE: {mean_squared_error(y_test, y_pred):.4f}')
    print(f'MAE: {mean_absolute_error(y_test, y_pred):.4f}')
    print(f'MAPE: {mean_absolute_percentage_error(y_test, y_pred):.4f}')
    print(f'R^2: {r2_score(y_test, y_pred):.4f}')   

    return pipeline

In [49]:
try_model(LinearRegression, X_train, y_train, X_test, y_test)

LinearRegression
MSE: 197.7173
MAE: 11.1404
MAPE: 0.1805
R^2: 0.4140


In [55]:
try_model(GradientBoostingRegressor, X_train, y_train, X_test, y_test, model_params=dict(learning_rate=0.3, n_estimators=10, verbose=1, subsample=0.5))

GradientBoostingRegressor
      Iter       Train Loss      OOB Improve   Remaining Time 
         1         215.7753          80.3549            0.01s
         2         159.1108          42.3174            0.01s
         3         129.7255          27.5638            0.02s
         4         127.9797          -1.8709            0.01s
         5         115.5469           8.2948            0.01s
         6         117.3270          -0.4739            0.01s
         7         102.9185           1.1728            0.01s
         8         102.7319          -7.9885            0.00s
         9          96.2095          -0.0722            0.00s
        10          88.1429           2.1402            0.00s
MSE: 146.9568
MAE: 9.1528
MAPE: 0.1538
R^2: 0.5644


In [58]:
try_model(MLPRegressor, X_train, y_train, X_test, y_test, model_params=dict(max_iter=10, random_state=0))

MLPRegressor
MSE: 5948.4265
MAE: 74.9083
MAPE: 0.9884
R^2: -16.6312




In [59]:
data = pd.read_csv('/content/drive/MyDrive/cargoes_boxes_1.csv')

In [60]:
data = data.drop(columns = ['Unnamed: 0'])

In [61]:
data = data.drop(index=233)

In [62]:
data = data.drop(index=470)

In [63]:
X = data.drop('density_percent', axis=1)
y = data.density_percent

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.3, random_state=12345)

In [65]:
try_model(LinearRegression, X_train, y_train, X_test, y_test)

LinearRegression


ValueError: ignored