In [1]:
import pandas as pd
import numpy as np
import sqlite3

from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors

In [2]:
conn = sqlite3.connect('../database.db')

In [14]:
encoder_struct = LabelEncoder()
encoder_type = LabelEncoder()

In [4]:
data_Parts = pd.read_sql("select Id, Name, HeadingId, CategoryId from Parts;", con=conn)
data_StructuresParts = pd.read_sql("select StructureId, PartId from StructuresParts;", con=conn)
data_Structures = pd.read_sql("select Id, StandardProjectId, TypeId from Structures;", con=conn)
data_StandardProjects = pd.read_sql("select Id, ImageIndex from StandardProjects;", con=conn)
data_Conductors = pd.read_sql("select PartId, TypeId, Diameter, CrossSection from Conductors;", con=conn)

In [5]:
df = data_Parts.merge(data_StructuresParts, left_on='Id', right_on='PartId', how='outer').drop('PartId', axis=1)

df = df.merge(data_Structures, left_on='StructureId', right_on='Id', how='outer').drop('Id_y', axis=1)

data_Conductors.rename(columns={'PartId': 'Id_x'}, inplace=True)

df = pd.concat([df, data_Conductors], axis=0)

df = df.merge(data_StandardProjects, left_on='StandardProjectId', right_on='Id', how='left').drop('Id', axis=1)

In [6]:
df.sample(5)

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
112870,ДА4,Плита анкерная деревянная,18.0,57.0,ПОАд7,25.0,support,,,2.0
111778,Бетон В20 W6 F200,Бетон,16.0,54.0,ККМкв вар.2,57.0,support,,,2.0
115859,СК135-10,Стойка,7.0,114.0,К35-1з(2хСК135-10)-1_(120мм2),297.0,pole35,,,0.0
5448,030316,DLPlus Ответвительная коробка (75х75х35),15.0,20190823.0,,,,,,
59135,ULM383,"Лоток лестничный 80х300, лонжерон 1,5 мм, L 3 м",15.0,20190823.0,,,,,,


In [7]:
df['StructureId'] = df['StructureId'].str.split('_').str[0]
df['ImageIndex'] = df['ImageIndex'] + 1
df.sample(5)

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
122295,SO115.9585,Вязка спиральная для провода 70-95 мм²,9.0,36.0,КП20з,215.0,support10,,,2.0
21339,661655,Декоративная рамка для накладного монтажа для ...,15.0,20190823.0,,,,,,
5569,031645,Выдвижной блок - для монтажа на торце мини-пли...,15.0,20190823.0,,,,,,
46884,mb-inox-652,"Щит из нержавеющей стали ""Inox"" AISI 304 (600х...",17.0,20190823.0,,,,,,
22131,752526,Valena LIFE.Переключатель 10АХ 250В с подсветк...,15.0,20190823.0,,,,,,


In [8]:
df['Name'] = df['Name'].fillna('')
df['StructureId'] = df['StructureId'].fillna('')
df['TypeId'] = df['TypeId'].fillna('')

df['StructureId'] = encoder_struct.fit_transform(df['StructureId'])
df['TypeId'] = encoder_type.fit_transform(df['TypeId'])
df = df.fillna(0)

df = df.drop_duplicates().reset_index(drop=True)
df[['HeadingId', 'CategoryId', 'StandardProjectId', 'ImageIndex']] = df[['HeadingId', 'CategoryId', 'StandardProjectId', 'ImageIndex']].astype('int32')
df.sample(5)

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
14312,37004INOX,Угол CDSD 90 вертикальный внеш переходник прав...,15,20190823,0,0,0,0.0,0.0,0
10770,2058 LW 32,Опорная пластина для U-образных зажимных скоб ...,15,20190823,0,0,0,0.0,0.0,0
93985,GUKo2,Зажим натяжной магистральный,9,58,964,806,15,0.0,0.0,3
112796,АПвЭВ 3х120-10,,0,0,0,0,6,0.0,0.0,0
79015,Труба термостойкая ∅110х10мм. Красная,"Труба полимерная гладкостенная, повышенной тер...",16,20190609,0,0,0,0.0,0.0,0


**NearestNeighbors**

In [24]:
features = df.drop(columns=['Id_x', 'Name'], axis=1)

In [59]:
n_neighbors = 35
model = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine')
model.fit(features);

In [69]:
request = df.loc[np.random.randint(df.shape[0], size=10)]
request

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex
81681,TTDC28401FA,Зажим прокалывающий,9,58,2055,134,11,0.0,0.0,2
85494,CNA ______G28,Зажим аппаратный,9,58,1043,268,11,0.0,0.0,2
2378,010790,Угол плоский 90° - для односекционных кабель-к...,15,20190823,0,0,0,0.0,0.0,0
46234,LT 650 R3 FS,Т-образная секция,15,20190823,0,0,0,0.0,0.0,0
44329,LC8660INOX,"Угол горизонтальный 90º 80x600 R-600, нержавеющий",15,20190823,0,0,0,0.0,0.0,0
30773,CLP1K-400-3,Крышка на лоток осн. 400мм IEK,17,101,0,0,0,0.0,0.0,0
35955,DP1201,Плита из минерал. волокна с огнестойким покрыт...,15,20190823,0,0,0,0.0,0.0,0
70004,"КВХ-ГДР 1.25кВт/2,5м",Электронагреватель стрелочный стержневой КВХ-Г...,17,20180531,0,0,0,0.0,0.0,0
52293,rel-4312-120-150,Реле тепловое РТЭ-4312 120-150А EKF PROxima,17,20190823,0,0,0,0.0,0.0,0
80116,ЩМП-1-1-395х310х150-IP54-УХЛ2,ЩМП Корпуса щитов с монтажной панелью металлич...,15,20200217,0,0,0,0.0,0.0,0


In [70]:
results = pd.DataFrame()
for _ in range(request.shape[0]):
    element = request.iloc[_].to_frame().T.drop(columns=['Id_x', 'Name'])
    answer = model.kneighbors(element)
    result = df.loc[answer[1][0]]
    result['distance'] = answer[0][0]
    result = result[~result['Id_x'].isin(request['Id_x'])]
    result = result.drop_duplicates(subset=['Id_x'])
    result = result[:int(np.ceil(25 / request.shape[0] + 1))]
    results = pd.concat([results, result], axis=0)

results = results.drop_duplicates(subset=['Id_x']).sort_values(by='distance').head(25).sort_values(by='Name')
results

Unnamed: 0,Id_x,Name,HeadingId,CategoryId,StructureId,StandardProjectId,TypeId,Diameter,CrossSection,ImageIndex,distance
40922,gv2p05-pro,"Автомат пуска двигателя GV2P 0,63-1,0 А EKF P...",17,20190823,0,0,0,0.0,0.0,0,0.0
40923,gv2p06-pro,"Автомат пуска двигателя GV2P 1,0-1,6 А EKF PR...",17,20190823,0,0,0,0.0,0.0,0,0.0
40924,gv2p07-pro,"Автомат пуска двигателя GV2P 1,6-2,5 А EKF PR...",17,20190823,0,0,0,0.0,0.0,0,0.0
50264,OWP/R 418 /595/ IP54/IP54 HFR,Встраиваемый диммируемый светильник с люминице...,17,20190406,0,0,0,0.0,0.0,0,0.0
50263,OWP/R 418 /595/ IP54/IP54 HFD,Встраиваемый диммируемый светильник с люминице...,17,20190609,0,0,0,0.0,0.0,0,0.0
50260,OWP/R 414 /595/ IP54/IP54 HF mat,Встраиваемый светильник с люминицентными лампа...,17,20190406,0,0,0,0.0,0.0,0,0.0
67975,"Гайка М10, ГОСТ ISO 4032-2014",Гайка шестигранная нормальная с резьбой М10,15,20210505,0,0,0,0.0,0.0,0,0.0
67990,"Гайка М30, DIN934 (ГОСТ 5915-70)",Гайка шестигранная оцинкованная,15,20210505,0,0,0,0.0,0.0,0,0.0
81679,PA2870P,Зажим натяжной,9,58,2055,134,11,0.0,0.0,2,0.0
81685,PA28120P,Зажим натяжной,9,58,2055,134,11,0.0,0.0,2,0.0
