In [1]:
import pandas as pd
import numpy as np
import sqlite3

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.neighbors import NearestNeighbors

In [2]:
conn = sqlite3.connect('../../database.db')

In [3]:
encoder_struct = LabelEncoder()
encoder_type = LabelEncoder()
scaler = MinMaxScaler()

In [4]:
data_Parts = pd.read_sql("select Id, Name, HeadingId, CategoryId from Parts;", con=conn)
data_StructuresParts = pd.read_sql("select StructureId, PartId from StructuresParts;", con=conn)
data_Structures = pd.read_sql("select Id, StandardProjectId, TypeId from Structures;", con=conn)
data_StandardProjects = pd.read_sql("select Id, ImageIndex from StandardProjects;", con=conn)
data_Conductors = pd.read_sql("select PartId, TypeId, Diameter, CrossSection from Conductors;", con=conn)

In [5]:
df = data_Parts.merge(data_StructuresParts, left_on='Id', right_on='PartId', how='outer').drop('PartId', axis=1)

df = df.merge(data_Structures, left_on='StructureId', right_on='Id', how='outer').drop('Id_y', axis=1)

data_Conductors.rename(columns={'PartId': 'Id_x'}, inplace=True)

df = pd.concat([df, data_Conductors], axis=0)

df = df.merge(data_StandardProjects, left_on='StandardProjectId', right_on='Id', how='left').drop('Id', axis=1)

In [6]:
df.sample(5)

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
92164,ПС-1-1,Зажим плашечный,9.0,26.0,ПК2(2хСВ95-2)-1_(25-70мм2),801.0,support,,,2.0
129597,К7ВВнг(А)-FRLS 1х10мк-1,,,,,,cabel,,,
73999,НІК 2303І АРП6 1770,Счетчик электрической энергии нетарифный,17.0,110.0,,,,,,
89860,AZIC 150,Защитный аппарат ПЛЗ,9.0,58.0,"А10-1 (2хСВ105-3,6)-1_(95-120 мм²)",73.0,support10,,,1.0
25410,AV-6 2P 16A (B) 6kA,Выключатель автоматический AV-6 2P 16A (B) 6kA...,17.0,20190823.0,,,,,,


In [7]:
df['StructureId'] = df['StructureId'].str.split('_').str[0]
df['ImageIndex'] = df['ImageIndex'] + 1
df.sample(5)

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
72160,МСС-2.3П-9500ГЦ,Молниеприемник стержневой сборный на плите,10.0,20171001.0,,,,,,
10108,101 R-16,Крепеж изолирующих стержней к трубе,15.0,20190823.0,,,,,,
97152,GHSO 16,Крюк для круглых опор,9.0,58.0,ВКА2(1хСК105-8)-1,822.0,support,,,3.0
21768,694615,Многорозеточный блок с 2хUSB 4x2К З - с кабеле...,17.0,20190823.0,,,,,,
62074,V25-B C 1NPE150,"УЗИП для силовых сетей 1 NPE (Класс I II), 150 В",15.0,20190823.0,,,,,,


In [8]:
df[['Name', 'StructureId', 'TypeId']] = df[['Name', 'StructureId', 'TypeId']].fillna('')
df = df.fillna(0)

df = df.drop_duplicates().reset_index(drop=True)
df[['HeadingId', 'CategoryId', 'StandardProjectId', 'ImageIndex']] = df[['HeadingId', 'CategoryId', 'StandardProjectId', 'ImageIndex']].astype('int32')
df.sample(5)

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
47505,MH25040/DISJ,Трёхфазный шкаф Alpimatic - тип H - 400 В - 25...,17,20190823,,0,,0.0,0.0,0
40324,GRB 90 140 G,Угловая секция 90° 105x400,15,20190823,,0,,0.0,0.0,0
48275,Mod-7-3,Корпус ModBox 1400х800х170 (324 мод.) EKF PROxima,17,20190823,,0,,0.0,0.0,0
107660,"КР02, 231н/3-012",Кронштейн,8,19,ВКА20з,216,support10,0.0,0.0,2
96569,"СВ105-3,6","Стойка железобетонная вибрированная, ТУ 5863-0...",7,114,"КП2(2хСВ105-3,6)-4",787,support,0.0,0.0,3


**NearestNeighbors**

In [9]:
features = df.drop(columns=['Id_x', 'Name'], axis=1)
features['StructureId'] = encoder_struct.fit_transform(features['StructureId'])
features['TypeId'] = encoder_type.fit_transform(features['TypeId'])
features = scaler.fit_transform(features)
features[0:2]

array([[3.33333333e-01, 2.86840325e-06, 3.29069255e-01, 1.47157191e-01,
        8.88888889e-01, 0.00000000e+00, 0.00000000e+00, 5.00000000e-01],
       [3.70370370e-01, 4.94552284e-07, 3.29069255e-01, 1.47157191e-01,
        8.88888889e-01, 0.00000000e+00, 0.00000000e+00, 5.00000000e-01]])

In [10]:
n_neighbors = 100
model = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine')
model.fit(features);

In [11]:
request = df[df['StructureId'] == 'А11']
request

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
99765,COT36.2,Бугель (скрепа) для ленты,9,28,А11,47,support,0.0,0.0,3
99766,COT37.2,"Лента бандажная стальная 20 мм x 0,70 мм x 50 ...",9,28,А11,47,support,0.0,0.0,3
99767,PER15,"Ремешок бандажный, L=300 мм, B=4,8 мм, D=80 мм",9,28,А11,47,support,0.0,0.0,3
99768,SH702R,Стяжка ж/б стойки типа СВ110,8,28,А11,47,support,0.0,0.0,3
99769,SL37.2,Зажим соединительный плашечный; магистраль: 6-...,9,28,А11,47,support,0.0,0.0,3
99770,SLIP22.1,Зажим влагозащищенный изолированный прокалываю...,9,28,А11,47,support,0.0,0.0,3
99771,SLIP22.127,Зажим влагозащищенный изолированный прокалываю...,9,28,А11,47,support,0.0,0.0,3
99772,SO250.01,Зажим натяжной клиновой для магистрали (50-70 ...,9,28,А11,47,support,0.0,0.0,3
99773,SOT29.10R,Крюк бандажный ø16 мм,9,72,А11,47,support,0.0,0.0,3
99774,ЗП6,Заземляющий проводник,8,25,А11,47,support,0.0,0.0,3


In [12]:
results = pd.DataFrame()
for _ in range(request.shape[0]):
    element = request.iloc[[_]].drop(columns=['Id_x', 'Name'])
    element['StructureId'] = encoder_struct.transform(element['StructureId'])
    element['TypeId'] = encoder_type.transform(element['TypeId'])
    element = scaler.transform(element)
    answer = model.kneighbors(element)
    result = df.loc[answer[1][0]]
    result['distance'] = answer[0][0]
    result = result[~result['Id_x'].isin(request['Id_x'])]
    try:
        result = result[~result['Id_x'].isin(results['Id_x'])]
    except:
        pass
    result = result.drop_duplicates(subset=['Id_x'])
    result = result[:int(np.ceil(25 / request.shape[0]))]
    results = pd.concat([results, result], axis=0)

results = results.sort_values(by='distance').head(25).sort_values(by='Id_x')
results

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex,distance
87340,CA-2000,Кроншт.ейн анкерный CА2000 EKF PROxima,9,95,А29,44,support,0.0,0.0,3,0.0002092735
94918,CS 10.3,Кронштейн анкерный,9,95,А23,2,support,0.0,0.0,3,0.001084449
94919,E 778,Стяжной хомут для жгута СИП диаметром 10-45 мм,9,95,А23,2,support,0.0,0.0,3,0.001084449
87341,F 20,"Лента металлическая 20х0,7(0,8)х1000",9,26,А29,44,support,0.0,0.0,3,0.0002092735
94920,F 207,"Металлическая лента 20x0,7x1000 мм",9,95,А23,2,support,0.0,0.0,3,0.001084449
87342,KR 1,Кабельный ремешок,9,26,А29,44,support,0.0,0.0,3,0.0002092735
87343,KZP1,Зажим,9,26,А29,44,support,0.0,0.0,3,0.0002092735
94922,P 72,Зажим для подкл. абонента к изолир. магистраль...,9,95,А23,2,support,0.0,0.0,3,0.001084449
87344,PA 1500,Анкерный клиновый зажим. Cечение жилы 50-70 мм2,9,95,А29,44,support,0.0,0.0,3,0.0002092735
106087,PD2.2,Гайка крюкообразная,9,72,А12,48,support,0.0,0.0,3,1.082371e-06
