In [43]:
# Импортируем необходимые библиотеки
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

import pickle

In [44]:
# Импортируем данных
df_nup = pd.read_excel("dataset/X_nup.xlsx")
df_bp = pd.read_excel("dataset/X_bp.xlsx")

In [45]:
# Объединяем два датафрейма функцией merge по столбцу 'Unnamed: 0'. Используем метод Inner.
df_merge = df_bp.merge(df_nup, on = 'Unnamed: 0', how = 'inner')

In [46]:
# Удаляем столбец с нумерацией, который дублирует информацию
df = df_merge.drop(columns = ['Unnamed: 0'], axis = 1)
#Поменяем тип данных для Угла нашивки
df['Угол нашивки, град'] = df['Угол нашивки, град'].astype('float64')

In [47]:
#Переименуем колонки
df.rename(columns = {'Соотношение матрица-наполнитель':'matrix_filler', 
                     'Плотность, кг/м3':'density', 'модуль упругости, ГПа':'elastic_modulus',
                     'Количество отвердителя, м.%':'hardener', 'Содержание эпоксидных групп,%_2':'epoxies', 
                     'Температура вспышки, С_2':'temp', 'Поверхностная плотность, г/м2':'surface_density', 
                     'Модуль упругости при растяжении, ГПа':'tensile_modulus',
                     'Прочность при растяжении, МПа':'tensile_strength', 'Потребление смолы, г/м2':'resin_consumption', 
                     'Угол нашивки, град':'patch_angle', 'Шаг нашивки':'patch_step', 'Плотность нашивки':'patch_density'},
          inplace = True )

In [48]:
df.columns
for i in df.columns:
    perc25=np.quantile(df[i],0.25)
    perc75=np.quantile(df[i],0.75)
    IQR=perc75-perc25
    df.loc[((perc75+1.5*IQR<df[i])|(perc25-1.5*IQR>df[i])),i]=np.median(df[i])
describe = df.describe().T
describe['median'] = df.median()
describe

Unnamed: 0,count,mean,std,min,25%,50%,75%,max,median
matrix_filler,1023.0,2.925149,0.892296,0.547391,2.321061,2.906878,3.548775,5.314144,2.906878
density,1023.0,1974.780371,70.84306,1784.482245,1924.53502,1977.621657,2020.628746,2161.565216,1977.621657
elastic_modulus,1023.0,737.725703,326.453983,2.436909,500.047452,739.664328,957.386959,1649.415706,739.664328
hardener,1023.0,110.558141,26.661661,38.6685,92.883002,110.56484,129.233303,181.828448,110.56484
epoxies,1023.0,22.241659,2.369521,15.695894,20.615227,22.230744,23.961468,28.955094,22.230744
temp,1023.0,285.949096,39.355511,179.374391,259.372264,285.896812,312.586724,386.067992,285.896812
surface_density,1023.0,480.887409,278.40028,0.60374,266.816645,451.864365,691.775979,1291.340115,451.864365
tensile_modulus,1023.0,73.310802,3.042662,65.553336,71.287231,73.268805,75.322176,81.417126,73.268805
tensile_strength,1023.0,2465.702548,465.48004,1250.392802,2146.295983,2459.524526,2755.823091,3705.672523,2459.524526
resin_consumption,1023.0,218.08992,57.681967,63.685698,179.93241,219.198882,256.760915,359.05222,219.198882


In [49]:
X_strength = df.copy()
y_strength = X_strength.pop('tensile_strength')
X_strength.shape, y_strength.shape

((1023, 12), (1023,))

In [50]:
minmax_scaler = MinMaxScaler()
X_strength_norm = minmax_scaler.fit_transform(X_strength)
X_strength_norm = pd.DataFrame(data = X_strength_norm , columns = ['matrix_filler', 'density', 'elastic_modulus','hardener','epoxies',
                                                'temp','surface_density','tensile_modulus',
                                                'resin_consumption','patch_angle', 'patch_step','patch_density']
                      )
X_strength_norm.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
matrix_filler,1023.0,0.498821,0.187192,0.0,0.372092,0.494988,0.62965,1.0
density,1023.0,0.504658,0.187871,0.0,0.371411,0.512193,0.626245,1.0
elastic_modulus,1023.0,0.446447,0.198214,0.0,0.302135,0.447624,0.579819,1.0
hardener,1023.0,0.502163,0.186237,0.0,0.378699,0.50221,0.632613,1.0
epoxies,1023.0,0.493677,0.178708,0.0,0.371013,0.492854,0.623384,1.0
temp,1023.0,0.515617,0.190405,0.0,0.387036,0.515364,0.644492,1.0
surface_density,1023.0,0.372101,0.215691,0.0,0.206249,0.349615,0.535487,1.0
tensile_modulus,1023.0,0.489005,0.191799,0.0,0.361445,0.486357,0.615795,1.0
resin_consumption,1023.0,0.522755,0.195289,0.0,0.393568,0.526509,0.65368,1.0
patch_angle,1023.0,0.491691,0.500175,0.0,0.0,0.0,1.0,1.0


In [51]:
y_strength = pd.DataFrame(data = y_strength,columns = ['tensile_strength'])

In [52]:
minmax_scaler = MinMaxScaler()
y_strength_norm = minmax_scaler.fit_transform(y_strength)
y_strength_norm = pd.DataFrame(data = y_strength_norm, columns = ['tensile_strength'])
y_strength_norm

Unnamed: 0,tensile_strength
0,0.712590
1,0.712590
2,0.712590
3,0.712590
4,0.712590
...,...
1018,0.463043
1019,0.452087
1020,0.575296
1021,0.334513


In [53]:
X_strength_orig_train, X_strength_orig_test, y_strength_orig_train, y_strength_orig_test = train_test_split(X_strength_norm,
                                                                                                            y_strength_norm, 
                                                                                                            test_size=0.3, 
                                                                                                            random_state=42)
print('X_strength_orig_train:', X_strength_orig_train.shape, 'y_strength_orig_train:', y_strength_orig_train.shape)
print('X_strength_orig_test:', X_strength_orig_test.shape, 'y_strength_orig_test:', y_strength_orig_test.shape)

X_strength_orig_train: (716, 12) y_strength_orig_train: (716, 1)
X_strength_orig_test: (307, 12) y_strength_orig_test: (307, 1)


In [54]:
# Модель линейной регрессии для выборки с целевой переменной 'Прочность при растяжении, ГПа'
model_LinearRegression = LinearRegression()
model_LinearRegression.fit(X_strength_orig_train, y_strength_orig_train)
preds_strenght_orig_lin_test = model_LinearRegression.predict(X_strength_orig_test)
preds_strenght_orig_lin_test

array([[0.52049746],
       [0.48054873],
       [0.50358175],
       [0.51224067],
       [0.51179089],
       [0.47773232],
       [0.47893131],
       [0.49457768],
       [0.47748915],
       [0.50613063],
       [0.46992842],
       [0.49085764],
       [0.41474143],
       [0.49140436],
       [0.49032119],
       [0.49374085],
       [0.45934747],
       [0.48259975],
       [0.53037109],
       [0.52437309],
       [0.47001194],
       [0.49693948],
       [0.49537329],
       [0.50926215],
       [0.4466394 ],
       [0.50552831],
       [0.49386712],
       [0.48529874],
       [0.47491846],
       [0.53504525],
       [0.46154462],
       [0.48212524],
       [0.52309677],
       [0.47125197],
       [0.4792633 ],
       [0.48436598],
       [0.51825306],
       [0.45015236],
       [0.50429574],
       [0.48729292],
       [0.50719715],
       [0.53786423],
       [0.50062071],
       [0.47062502],
       [0.50680548],
       [0.51271609],
       [0.53432783],
       [0.463

In [55]:
preds = minmax_scaler.inverse_transform(preds_strenght_orig_lin_test)
preds

array([[2528.35965871],
       [2430.2743564 ],
       [2486.8268607 ],
       [2508.08694224],
       [2506.98258619],
       [2423.35927298],
       [2426.30314369],
       [2464.71935387],
       [2422.76223018],
       [2493.08507209],
       [2404.19852405],
       [2455.58562204],
       [2268.69902387],
       [2456.92796772],
       [2454.26846699],
       [2462.664687  ],
       [2378.21932124],
       [2435.31017507],
       [2552.60218929],
       [2537.87541592],
       [2404.40359911],
       [2470.51823504],
       [2466.67278663],
       [2500.77384089],
       [2347.01746497],
       [2491.60621017],
       [2462.97473659],
       [2441.93695918],
       [2416.45045997],
       [2564.07855873],
       [2383.61395026],
       [2434.14511615],
       [2534.74168851],
       [2407.44819586],
       [2427.11825647],
       [2439.64677199],
       [2522.84904148],
       [2355.64277524],
       [2488.57990954],
       [2446.83323629],
       [2495.70368276],
       [2570.999

In [56]:
pickle.dump(model_LinearRegression, open('LR_flask.pkl', 'wb'))

In [57]:
model_load = pickle.load(open('LR_flask.pkl', 'rb'))

In [58]:
model_load.predict(X_strength_orig_test)

array([[0.52049746],
       [0.48054873],
       [0.50358175],
       [0.51224067],
       [0.51179089],
       [0.47773232],
       [0.47893131],
       [0.49457768],
       [0.47748915],
       [0.50613063],
       [0.46992842],
       [0.49085764],
       [0.41474143],
       [0.49140436],
       [0.49032119],
       [0.49374085],
       [0.45934747],
       [0.48259975],
       [0.53037109],
       [0.52437309],
       [0.47001194],
       [0.49693948],
       [0.49537329],
       [0.50926215],
       [0.4466394 ],
       [0.50552831],
       [0.49386712],
       [0.48529874],
       [0.47491846],
       [0.53504525],
       [0.46154462],
       [0.48212524],
       [0.52309677],
       [0.47125197],
       [0.4792633 ],
       [0.48436598],
       [0.51825306],
       [0.45015236],
       [0.50429574],
       [0.48729292],
       [0.50719715],
       [0.53786423],
       [0.50062071],
       [0.47062502],
       [0.50680548],
       [0.51271609],
       [0.53432783],
       [0.463