# Этап 1 - Импорты

In [1]:
import pandas as pd
from sqlalchemy import create_engine, text
from sklearn import tree
from sklearn.model_selection import train_test_split
from graphviz import Source

# Этап 2 - Выгрузка датасета из БД

In [5]:
engine = create_engine('postgresql+psycopg2://postgres:admin@localhost/real_time_system')

with engine.begin() as conn:
    dataset = pd.read_sql_query(text('SELECT * FROM final_table;'), conn, index_col="HeatNo")

dataset.head(10)

Unnamed: 0_level_0,TotalIngotsWeight,PouringScrap,OtherScrap,Last_EOP,Cr_Last_EOP,Cr_Final_x,LFVD_FeCrA_x,LFVD_FeCrC_x,Ni_Last_EOP,Ni_Final_x,...,PV_OCR12VM_OSTRUŽKI,PV_POSEBNA_JEKLA_OSTRUŽKI,PV_BRM2_OSTRUŽKI,PV_E1,PV_E3,PV_E6,PV_E8,PV_Kore,PV_GRODELJ,PV_E40
HeatNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
71601,52230.0,800.0,1000.0,,,11.48,0.0,1900.0,,0.11,...,0,0,0,0,43700,0,0,0,0,0
71602,49290.0,1000.0,2500.0,,,11.58,0.0,2000.0,,0.1,...,0,0,0,0,42100,0,0,0,0,0
71609,45210.0,800.0,1500.0,,,11.51,0.0,200.0,,0.29,...,0,0,0,0,0,0,1300,0,0,0
71610,46880.0,600.0,400.0,,,11.56,0.0,0.0,,0.18,...,0,0,0,0,0,16600,0,0,0,0
71616,42000.0,600.0,100.0,,,11.6,0.0,0.0,,0.21,...,0,0,0,0,0,0,1500,0,0,0
71638,47380.0,400.0,2200.0,,,11.58,0.0,200.0,,0.16,...,0,0,0,0,0,16600,0,0,0,0
71641,45830.0,900.0,2000.0,,,11.86,0.0,0.0,,0.15,...,0,0,0,0,0,13800,0,0,0,0
71642,48100.0,1000.0,800.0,,,11.61,0.0,400.0,,0.16,...,0,0,0,0,0,13200,0,0,0,0
71643,49400.0,1000.0,500.0,,,11.75,0.0,450.0,,0.16,...,0,0,0,0,0,13200,0,0,0,0
71644,49440.0,800.0,400.0,,,11.59,0.0,900.0,,0.17,...,0,0,0,0,0,13200,0,0,0,0


# Предикторы



In [None]:
error_weight_cr = dataset[(dataset['Cr_Final'] < dataset['Cr_LowerLimit']) | (dataset['Cr_Final'] > dataset['Cr_UpperLimit'])]['TotalIngotsWeight'].sum() / 1000

print('Ingots with errors:', error_weight_cr, 'tons')
print('Costs of  errors:', error_weight_cr * 1000, 'euros')
print('Percent of errors:', error_weight_cr / (dataset['TotalIngotsWeight'].sum() / 1000) * 100, '%')

Ingots with errors: 1121.89 tons
Costs of  errors: 1121890.0 euros
Percent of errors: 0.6649597889559206 %


In [None]:
error_weight_ni = dataset[(dataset['Ni_Final'] < dataset['Ni_LowerLimit']) | (dataset['Ni_Final'] > dataset['Ni_UpperLimit'])]['TotalIngotsWeight'].sum() / 1000

print('Ingots with errors:', error_weight_ni, 'tons')
print('Costs of  errors:', error_weight_ni * 1000, 'euros')
print('Percent of errors:', error_weight_ni / (dataset['TotalIngotsWeight'].sum() / 1000) * 100, '%')

Ingots with errors: 3513.54 tons
Costs of  errors: 3513540.0 euros
Percent of errors: 2.082523970164798 %


In [None]:
cr_more_than_tagret = dataset[dataset['Cr_Final'] > dataset['Cr_Target']]

print('Cr more than tagret:',cr_more_than_tagret.shape[0] / dataset.shape[0] * 100, '%')

print('На сколько мы ошибаемся по хрому в среднем:', (cr_more_than_tagret['Cr_Final'] - cr_more_than_tagret['Cr_Target']).mean())

columns_with_fecr = []
for column in dataset.columns:
    if 'FeCr' in column and 'LFVD' not in column:
        columns_with_fecr.append(column)

columns_with_fecr

Cr more than tagret: 41.52684563758389 %
На сколько мы ошибаемся по хрому в среднем: 0.08628956228956236


['FeCrA', 'FeCrC', 'FeCrC Si', 'FeCrC51', 'FeCrCSi']

# Этап 3

Делим датасет на x и y, удаляем ненужные столбцы

In [None]:
unique_adds = ['FeMo', 'FeV', 'FeMnC', 'FeSi', 'CaO', 'BOKSIT beli', 'Al bloki', 'KARBORITmleti', 'FeCrC',
               'SLAGMAG 65B', 'EPZ zlindra', 'CASIfi13', 'Cfi13', 'FeAl', 'FeCrA', 'FeCrC51', 'SiMn', 'POLYMOX',
               'FeCrC Si', 'Al zica', 'CaSi', 'Molyquick', 'Al opl.zica', 'Borax', 'S žica', 'BOKSIT', 'EPŽ žlindra',
               'FeW72', 'Kalcijev karbid', 'Mn met', 'Al gran', 'FeCrCSi', 'Ni gran', 'SINT. ŽLINDRA', 'DUŠIK',
               'karburit-kosi', 'FeV opl. žica', 'FeS', 'Ni katode']

y = dataset[unique_adds].copy()
x = dataset.drop(columns=unique_adds)

for column in x.keys():
    if 'Last_EOP' in column:
        try:
            x.drop(columns=[column], inplace=True)
            print('Дропнул', column)
        except:
            print(column, 'уже отсутствует!')

x.fillna(x.mean(), inplace=True)

x.head(10)

In [None]:
for column in y.keys():
    percent_of_not_zeros =(y[column] != 0).sum() / len(y[column])
    print(column)
    if percent_of_not_zeros < 0.7:
        print(percent_of_not_zeros, 'Плохой столбец!')
        y.drop(columns=[column], inplace=True)
    else:
        print(percent_of_not_zeros)

print(len(y.keys()))

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

In [None]:
scores_test = dict()
scores_train = dict()
models = dict()

for column in y_train.keys():
    models[column] = tree.DecisionTreeRegressor()
    models[column].fit(x_train, y_train[column])
    scores_train[column] = "%.2f" % (models[column].score(x_train, y_train[column]) * 100)
    scores_test[column] = "%.2f" % (models[column].score(x_test, y_test[column]) * 100)

print(scores_train)
print(scores_test)

In [None]:
for key in models:
    graph = Source( tree.export_graphviz(models[key], out_file=None, feature_names=x.columns))
    png_bytes = graph.pipe(format='png')
    with open('model_'+ key + '_' + scores_test[key] + '.png','wb') as f:
        f.write(png_bytes)

In [None]:
for target in y:
  print(target)
  for elem in x.columns:
    cor = y[target].corr(x[elem])
    if cor > 0.5:
      print(elem, cor)
  print()

# Метрики

Mean Error
MAE

Узнать важность признаков по мнению подели