<a href="https://colab.research.google.com/github/SedoyChloric/work_in_collab/blob/main/SVR_QSPR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from google.colab import auth
auth.authenticate_user()
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import gspread
from gspread.utils import ValueRenderOption
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

#Константы и функции

In [None]:
alphabet = "abcdefghijklmnopqrstuvwxyz"
clusterisation_column = 8
idk = "Я не знаю таких цифр"

Обработка данных:

In [None]:
def prepare_clusters(winner_coordinates, learning_rate, sigma, iterations, column, frequency_threshold):
  col = calculate_column(column)
  title = np.array([[np.round(learning_rate, 4), np.round(sigma, 4), iterations, f'=COUNTUNIQUE({col}6:{col})', f'="№"&столбец({col}1)-7']])
  unique_coords, cluster_labels, labels_frequency = np.unique(winner_coordinates, axis=0, return_inverse=True, return_counts=True)
  for i, freq in enumerate(labels_frequency):
    if freq < frequency_threshold:
      cluster_labels[cluster_labels == i] = -1
  return np.append(title, [cluster_labels], axis=1)

def to_list(array):
  return array.reshape(-1, 1).tolist()

def generate_unique_pairs(data):
  if data.ndim == 1:
    empty_array = np.empty((2, 0))
    for i in range(len(data)):
      for j in range(i + 1, len(data)):
        empty_array = np.append(empty_array, [[data[i]], [data[j]]], axis=1)
    return empty_array.T
  else:
    num_rows = data.shape[0]
    num_pairs = num_rows * (num_rows - 1) // 2
    empty_array = np.empty((num_pairs, 2, data.shape[1]))
    pair_index = 0
    for i in range(num_rows):
      for j in range(i + 1, num_rows):
        empty_array[pair_index, 0, :] = data[i, :]
        empty_array[pair_index, 1, :] = data[j, :]
        pair_index += 1
    return empty_array


In [None]:
print(np.array((2, 0)))

[2 0]


Работа с гугл-таблицами

In [None]:
def calculate_column(number, alphabet=alphabet):
  column = ''
  while number > 0:
    number, remainder = divmod(number - 1, 26)
    column = alphabet[remainder] + column
  return column

def ccell(column, row): #calculatecell
  return ''.join([calculate_column(column), str(row)])

def ccell_range(start_column, start_row, end_column, end_row):
  start = ccell(start_column, start_row)
  end = ccell(end_column, end_row)
  return ':'.join([start, end])

def get_data_from_googlesheet(google_spreedsheet, sheet_name):
  table = np.array(google_spreedsheet.worksheet(sheet_name).get_all_values()) #Лист преобразуется в массив
  name_of_properties = table[0][3:-1] # Получаем наименования свойств (со второго до предпоследнего)
  table = np.transpose(table) #Транспонируем
  id = table[1][1:] #Получаем лист наименований
  source = table[-1][1:] #Получаем лист сурсов
  properties = table[3:-1][0:] #Получаем лист свойств, который нужно будет снова транспонировать
  return id, source, np.transpose(np.delete(properties, 0, 1).astype('float64')), name_of_properties


#Получение данных по работе и ввод листа экспериментов


In [None]:
data_spreadsheet = gc.open_by_key('199JJj9XfyCkrX9I6I0PKK07vmkfNWT2OPLLX1OSwT8s')
experiment_spreadsheet = gc.open_by_key('1DPg6_La4imPZVX83Jqd-5U9pd7AGN5j_w8GSd9TlmWk')
indices_id_array, source_array, indices, name_of_indices = get_data_from_googlesheet(data_spreadsheet, "Статья1QSPR")
properties_id_array, source_array, properties, name_of_properties = get_data_from_googlesheet(data_spreadsheet, "Статья1св-ва")

In [None]:
try:
  sheet_name = input("Результаты будут импортированы в гугл-таблицу. \nИмя листа будет: ")
  worksheet = experiment_spreadsheet.add_worksheet(title=sheet_name, rows=1000, cols=1000)
except gspread.exceptions.APIError:
  worksheet = experiment_spreadsheet.worksheet(sheet_name)


Результаты будут импортированы в гугл-таблицу. 
Имя листа будет: 1d variation


In [None]:
svr = SVR()
param_grid = {
    'kernel': ['linear'],
    'C':[0.001, 0.01, 0.1, 0.5, 1, 10],
    'gamma': ['scale'],
    'epsilon': [0.001, 0.01, 0.1, 0.2]
    }

grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)

#Основной блок реализации


##Версия с минмаксной нормализацией

In [None]:
indices_scaler = MinMaxScaler()
properties_scaler = MinMaxScaler()
indices_train, indices_test, properties_train, properties_test = train_test_split(indices, properties, test_size=0.1, shuffle=False)
indices_train_scaled = indices_scaler.fit_transform(indices_train)
indices_test_scaled =  indices_scaler.transform(indices_test)
properties_train_scaled = properties_scaler.fit_transform(properties_train)
properties_test_scaled =  properties_scaler.transform(properties_test)
min_indices = indices_scaler.data_min_
min_properties = properties_scaler.data_min_
range_indices = indices_scaler.data_range_
range_properties = properties_scaler.data_range_
pairs = generate_unique_pairs(np.arange(len(name_of_indices))).astype(int)
print(f'Мин-макс для индексов:\n{min_properties}\n')
print(f'Разброс для индексов:\n{range_indices}')
print(f'Мин-макс для свойств:\n{min_properties}\n')
print(f'Разброс для свойств:\n{range_properties}')
#worksheet.update(indices_train_scaled.tolist(), 'j50')
#worksheet.update(properties_train_scaled.tolist(), 'k35')

Мин-макс для индексов:
[247.9  89.  103.7  46.5  36.9  14.6  38.1 102.7]

Разброс для индексов:
[3.191e+01 1.717e+01 4.040e+02 1.296e+03 7.780e+00 1.979e+02 5.401e+03
 5.370e+00]
Мин-макс для свойств:
[247.9  89.  103.7  46.5  36.9  14.6  38.1 102.7]

Разброс для свойств:
[733.9 181.  443.9 115.7 119.3  47.3  65.2 317.4]


##Основной блок


###Совершите выбор:

8d

In [None]:
pairs_of_scaled_indices = np.array([indices_train_scaled.T])
pairs_of_range_indices = np.array([range_indices])
pairs_of_min_indices = np.array([min_indices])
pairs_of_min_properties = np.array([min_properties])
pairs_of_range_properties = np.array([range_properties])

pairs_of_indices = np.array([indices.T])
pairs_of_names_indices = np.array([name_of_indices])

2d

In [None]:
pairs_of_scaled_indices = generate_unique_pairs(indices_train_scaled.T)
pairs_of_range_indices = generate_unique_pairs(range_indices)
pairs_of_min_indices = generate_unique_pairs(min_indices)
pairs_of_min_properties = generate_unique_pairs(min_properties)
pairs_of_range_properties = generate_unique_pairs(range_properties)

pairs_of_indices = generate_unique_pairs(indices.T)
pairs_of_names_indices = generate_unique_pairs(name_of_indices)

1d


In [None]:
pairs_of_scaled_indices = indices_train_scaled.T.reshape(indices.shape[1], 1, indices.shape[0]-2)
pairs_of_range_indices = range_indices.reshape(-1, 1)
pairs_of_min_indices = min_indices.reshape(-1, 1)
pairs_of_min_properties = min_properties.reshape(-1, 1)
pairs_of_range_properties = range_properties.reshape(-1, 1)

pairs_of_indices = indices.T.reshape(indices.shape[1], 1, indices.shape[0])
pairs_of_names_indices = name_of_indices.reshape(-1, 1)

###ВЫВОД (просто нажать кнопку)





Полезные константы

In [None]:
indices_number = name_of_indices.shape[0]    #8
prop_number = name_of_properties.shape[0]    #8
sample_number = properties.shape[0]          #14
coef_number = pairs_of_indices.shape[1] + 1  #2, 3, 9
pairs_number = pairs_of_indices.shape[0]     #8, 28, 1

empty_str_number = 1
param_number = 2

first_block_row = 2
first_block_column = 2

initial_row = first_block_row - 1
initial_column = first_block_column + prop_number

Коэффициенты

In [None]:
row = first_block_row
column = first_block_column

worksheet.update([name_of_properties.tolist()], ccell(column, row-1))
for i in range(pairs_number):
  print(f'\nОбучение SVR по индексам {pairs_of_names_indices[i]}\n')
  title = np.insert(pairs_of_names_indices[i], 0, "a0").reshape(-1, 1)
  iteration = np.empty_like(title)
  print(f'Диапазон индексов: {pairs_of_range_indices[i]}')
  for j in range(prop_number):
    print(f"Обучение SVR по свойству {name_of_properties[j]}...")
    grid_search.fit(pairs_of_scaled_indices[i].T, properties_train_scaled[:, j])
    print(f'Диапазон Свойства: {range_properties[j]}')
    print(f'Коэффициенты: {grid_search.best_estimator_.coef_[0, :]}')
    print(f"Свободный член: {grid_search.best_estimator_.intercept_}")
    coefficients = grid_search.best_estimator_.coef_.ravel() / pairs_of_range_indices[i] * range_properties[j]
    print(f'Коэффициенты после обработки: {coefficients}')
    intercept = min_properties[j]+grid_search.best_estimator_.intercept_[0]*range_properties[j] - np.sum(coefficients * pairs_of_min_indices[i])
    print(f'Свободный член после обработки: {intercept}')
    subiteration = np.array(np.round(np.array([[intercept]]), 6))
    subiteration = np.append(subiteration, np.round(coefficients.reshape(-1, 1), 6), axis=0)
    iteration = np.hstack((iteration, subiteration))
  iteration = np.delete(iteration, 0, axis=1)
  worksheet.update(title.tolist(), ccell(column-1, row))
  worksheet.update(iteration.tolist(), ccell(column, row), value_input_option='USER_ENTERED')
  row += title.shape[0]

пары индексов

In [None]:
row = initial_row
column = initial_column

pairs_of_titles = pairs_of_names_indices.reshape(pairs_number, coef_number - 1, 1)
empty_title = np.empty((pairs_number, coef_number - 1, 1), dtype=object)
empty_samples = np.empty((pairs_number, coef_number - 1, sample_number), dtype=object)
pairs_of_indices_with_empty_titles = np.append(empty_title, pairs_of_indices, axis=2)
pairs_of_names_indices_with_empty_samples = np.append(pairs_of_titles, empty_samples, axis=2)
first_pair_of_samples = pairs_of_indices_with_empty_titles[0]
first_pair_of_names = pairs_of_names_indices_with_empty_samples[0]
for i in range(1, pairs_number):
  first_pair_of_samples = np.append(first_pair_of_samples, pairs_of_indices_with_empty_titles[i], axis=1)
  first_pair_of_names = np.append(first_pair_of_names, pairs_of_names_indices_with_empty_samples[i], axis=1)
first_pair_of_samples = np.transpose(first_pair_of_samples)
first_pair_of_names = np.transpose(first_pair_of_names)
worksheet.update(first_pair_of_samples.tolist(), ccell(column, row), value_input_option='USER_ENTERED')
worksheet.update(first_pair_of_names.tolist(), ccell(column, row), value_input_option='USER_ENTERED')

{'spreadsheetId': '1DPg6_La4imPZVX83Jqd-5U9pd7AGN5j_w8GSd9TlmWk',
 'updatedRange': "'1d variation'!J1:J106",
 'updatedRows': 8,
 'updatedColumns': 1,
 'updatedCells': 8}

Подготовка блока математики

In [None]:
row = initial_row
column = initial_column + coef_number - 1
end_row = first_block_column + coef_number * pairs_number - 1
end_col = first_block_column + prop_number - 1
all_coef = np.array(worksheet.get(ccell_range(first_block_column, first_block_row, end_col, end_row))).astype('float64')
all_coef = np.transpose(all_coef)
all_coef = all_coef.reshape(prop_number, pairs_number, coef_number)
pairs_of_names_in_one_cell = np.array([])
for i in range(pairs_number):
  name_in_one_cell  = pairs_of_names_indices[i, 0]
  for j in range(1, coef_number-1):
    name_in_one_cell = '+'.join([name_in_one_cell, str(pairs_of_names_indices[i, j])])
  pairs_of_names_in_one_cell = np.append(pairs_of_names_in_one_cell, name_in_one_cell)
for i in range(prop_number):
  str_for_update = np.append(name_of_properties[i], pairs_of_names_in_one_cell)
  worksheet.update([str_for_update.tolist()], ccell(column, row))
  worksheet.update(all_coef[i].T.tolist(), ccell(column+1, row+1))
  row += coef_number + sample_number + param_number + empty_str_number + 1

Перенос свойств

In [None]:
row = initial_row
column = initial_column + pairs_of_names_indices.shape[1] + pairs_number + 1
worksheet.update([name_of_properties.tolist()], ccell(column, row))
worksheet.update(properties.tolist(), ccell(column, row+1))


{'spreadsheetId': '1DPg6_La4imPZVX83Jqd-5U9pd7AGN5j_w8GSd9TlmWk',
 'updatedRange': "'8d variation'!T2:AA15",
 'updatedRows': 14,
 'updatedColumns': 8,
 'updatedCells': 112}

Проводит все необходимые математические операции в блоке

In [None]:
row = initial_row + coef_number + 1
column = initial_column + coef_number - 1  + 1
prop_row = initial_row + 1
prop_column = initial_column + coef_number - 1 + pairs_number + 1
indices_start_col = initial_column
indices_end_col = initial_column + coef_number - 2

empty_strs = np.empty((empty_str_number, pairs_number), dtype=object)
sup_arr = np.arange(sample_number*pairs_number).reshape(sample_number, pairs_number)

#verification = f'=мумнож({ccell_range(20, (verification // 28 + 15 * (verification % 28)+2), 21, (verification // 28 + 15 * (verification % 28)+2))};{ccell_range(((verification % 28)+23), (row - 2), ((verification % 28)+23), (row-1))})+{ccell(verification % 28)+23, row-3}'
for prop in range(prop_number):
  verification = np.empty_like(sup_arr, dtype=object)
  for r in range(verification.shape[0]):
    for c in range(verification.shape[1]):
      indices_row = sup_arr[r, c] // pairs_number + (sample_number+1) * (sup_arr[r, c] % pairs_number) + initial_row + 1
      indices_range = ccell_range(indices_start_col, indices_row, indices_end_col, indices_row)
      coef_column = (sup_arr[r, c] % pairs_number) + column
      coef_start_row = row - coef_number + 1
      coef_end_row = row - 1
      coef_range = ccell_range(coef_column, coef_start_row, coef_column, coef_end_row)
      intercept_cell = ccell((sup_arr[r, c] % pairs_number) + column, row - coef_number)
      verification[r, c] = f'=мумнож({indices_range};{coef_range})+{intercept_cell}'
  rmse = np.empty((pairs_number), dtype=object)
  rsquared = np.empty((pairs_number), dtype=object)
  for i in range(verification.shape[1]):
    model_column = sup_arr[1, i] % pairs_number + column
    correspondence = f'{ccell_range(model_column, row, model_column, row+sample_number-1)};{ccell_range(prop_column, prop_row, prop_column, prop_row+sample_number-1)}'
    rmse[i] = f'=корень(суммквразн({correspondence})/{sample_number})'
    rsquared[i] = f'=квпирсон({correspondence})'
  prop_column += 1
  verification = np.append(verification, empty_strs, axis=0)
  verification = np.append(verification, [rmse], axis=0)
  verification = np.append(verification, [rsquared], axis=0)
  worksheet.update(verification.tolist(), ccell(column, row),value_input_option='USER_ENTERED')
  row += coef_number + sample_number + param_number + empty_str_number + 1

Выводы

In [None]:

row = initial_row + sample_number + 1
column = initial_column + coef_number - 1 + pairs_number + 1
#row = initial_row
#column = initial_column + prop_number + pairs_of_names_indices.shape[1] + 1 + pairs_number
verrow = initial_row + coef_number + sample_number + empty_str_number + 1
vercol = initial_column + coef_number - 1 + 1

conclusion_table = np.empty((param_number*4, prop_number), dtype=object)
titles = np.array([["Свойство"], ["Лучшая пара по RMSE"], ["Наименьшая RMSE"], ["Худшая пара по RMSE"], ["Наибольшая RMSE"], ["Лучшая пара по R^2"], ["Наивысшая R^2"], ["Худшая пара по R^2"], ["Наименьшая R^2"]])
for i in range(prop_number):
  search_range = ccell_range(vercol, verrow, vercol+pairs_number-1, verrow)
  conclusion_table[0, i] = f'=индекс({ccell_range(vercol, initial_row, vercol+pairs_number-1, initial_row)}; поискпоз(мин({search_range}); {search_range}; 0))'
  conclusion_table[1, i] = f'=индекс({search_range}; поискпоз(мин({search_range}); {search_range}; 0))'
  conclusion_table[2, i] = f'=индекс({ccell_range(vercol, initial_row, vercol+pairs_number-1, initial_row)}; поискпоз(макс({search_range}); {search_range}; 0))'
  conclusion_table[3, i] = f'=индекс({search_range}; поискпоз(макс({search_range}); {search_range}; 0))'
  search_range = ccell_range(vercol, verrow+1, vercol+pairs_number-1, verrow+1)
  conclusion_table[4, i] = f'=индекс({ccell_range(vercol, initial_row, vercol+pairs_number-1, initial_row)}; поискпоз(макс({search_range}); {search_range}; 0))'
  conclusion_table[5, i] = f'=индекс({search_range}; поискпоз(макс({search_range}); {search_range}; 0))'
  conclusion_table[6, i] = f'=индекс({ccell_range(vercol, initial_row, vercol+pairs_number-1, initial_row)}; поискпоз(мин({search_range}); {search_range}; 0))'
  conclusion_table[7, i] = f'=индекс({search_range}; поискпоз(мин({search_range}); {search_range}; 0))'
  verrow += coef_number + sample_number + param_number + empty_str_number + 1
conclusion_table = np.append([name_of_properties], conclusion_table, axis=0)
conclusion_table = np.append(titles, conclusion_table, axis=1)
conclusion_table = np.transpose(conclusion_table)
worksheet.update(conclusion_table.tolist(), ccell(column, row),value_input_option='USER_ENTERED')

{'spreadsheetId': '1DPg6_La4imPZVX83Jqd-5U9pd7AGN5j_w8GSd9TlmWk',
 'updatedRange': "'8d variation'!T16:AB24",
 'updatedRows': 9,
 'updatedColumns': 9,
 'updatedCells': 81}

##2D Без преобразования коэффициентов

In [None]:
subrow = 2
subcol = 10
row = 2
column = 2
worksheet.update([name_of_properties.tolist()], ccell(column, row-1))
for i in range(pairs.shape[0]):
  print(f'\nОбучение SVR по индексам {name_of_indices[pairs[i, 0]]} и {name_of_indices[pairs[i, 1]]}\n')
  grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
  pair_of_indices = np.append([indices_train_scaled[:, pairs[i, 0]]], [indices_train_scaled[:, pairs[i, 1]]], axis=0).T
  title = np.array([["C"], ["epsilon"], ["MSE"], ["a0"], [name_of_indices[pairs[i, 0]]], [name_of_indices[pairs[i, 1]]]])
  iteration = np.empty_like(title)
  for j in range(properties_train.shape[1]):
    print(f"Обучение SVR по свойству {name_of_properties[j]}...")
    grid_search.fit(pair_of_indices, properties_train_scaled[:, j])
    coefficients = grid_search.best_estimator_.coef_.ravel()
    intercept = grid_search.best_estimator_.intercept_
    print(f"Коэффициенты: {coefficients}")
    print(f"Свободный член: {intercept}")
    subiteration = np.array(np.array(list(grid_search.best_params_.values())[0:2]).reshape(-1, 1))
    subiteration = np.append(subiteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
    subiteration = np.append(subiteration, np.round(np.array([grid_search.best_estimator_.intercept_]), 6), axis=0)
    subiteration = np.append(subiteration, np.round(grid_search.best_estimator_.coef_.reshape(-1, 1), 6), axis=0)
    iteration = np.hstack((iteration, subiteration))
  iteration = np.delete(iteration, 0, axis=1)
  worksheet.update(title.tolist(), ccell(column-1, row))
  worksheet.update(iteration.tolist(), ccell(column, row), value_input_option='USER_ENTERED')
  row += title.shape[0]

8D

In [None]:
subrow = 2
subcol = 10
row = 2 + 14
column = 2
worksheet.update([name_of_properties.tolist()], ccell(column, row-1))
title = np.array([["C"], ["epsilon"], ["MSE"], ["a0"]])
title = np.append(title, name_of_indices.reshape(-1, 1), axis=0)
worksheet.update(title.tolist(), ccell(column-1, row))
for i in range(properties_train.shape[1]):
  print(f"\nОбучение SVR по свойству {name_of_properties[i]}...\n")
  grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
  grid_search.fit(indices_train_scaled, properties_train_scaled[:, i])
  coefficients = grid_search.best_estimator_.coef_.ravel()
  intercept = grid_search.best_estimator_.intercept_
  print(f"Коэффициент: {coefficients}")
  print(f"Свободный член: {intercept}")
  coefficients = (coefficients / range_indices) * range_properties[i]
  intercept = min_properties[i] + grid_search.best_estimator_.intercept_[0] * range_properties[i] - np.sum(coefficients * min_indices)
  print(f'Коэффициент после обработки: {coefficients}')
  print(f'Свободный член после обработки: {intercept}')
  iteration = np.array(np.array(list(grid_search.best_params_.values())[0:2]).reshape(-1, 1))
  iteration = np.append(iteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
  iteration = np.append(iteration, np.round(np.array([intercept]), 6).reshape(1, 1), axis=0)
  iteration = np.append(iteration, np.round(coefficients.reshape(-1, 1), 6), axis=0)
  worksheet.update(iteration.tolist(), f'{calculate_column(column)}{row}', value_input_option='USER_ENTERED')
  column += 1

##Версии со стандартной нормализацией


In [None]:
indices_scaler = StandardScaler()
properties_scaler = StandardScaler()
indices_train, indices_test, properties_train, properties_test = train_test_split(indices, properties, test_size=0.2, shuffle=False)
indices_train_scaled = indices_scaler.fit_transform(indices_train)
indices_test_scaled =  indices_scaler.transform(indices_test)
properties_train_scaled = properties_scaler.fit_transform(properties_train)
properties_test_scaled =  properties_scaler.transform(properties_test)
std_indices = indices_scaler.scale_
mean_indices = indices_scaler.mean_
std_properties = properties_scaler.scale_
mean_properties = properties_scaler.mean_
pairs = generate_unique_pairs(np.arange(len(name_of_indices))).astype(int)
print(f'Среднее по индексам: \n{mean_indices}')
print(f'Стандартное по индексам: \n{std_indices}')
print(f'Среднее по Свойствам: \n{mean_properties}')
print(f'Стандартное по Свойствам: \n{std_properties}')

worksheet.update(indices_train_scaled.tolist(), 'k22')
worksheet.update(properties_train_scaled.tolist(), 'k35')

Среднее по индексам: 
[  26.07666667   14.72        302.91666667  910.58333333    5.35416667
  149.27416667 3744.83333333    4.85416667]
Стандартное по индексам: 
[1.22210824e+01 6.33377586e+00 1.54977664e+02 5.04345857e+02
 2.50163100e+00 7.66757092e+01 2.08222160e+03 2.02681383e+00]
Среднее по Свойствам: 
[537.85583333 185.54166667 287.1         88.38333333  90.61666667
  35.91666667  60.14166667 254.325     ]
Стандартное по Свойствам: 
[199.82609554  42.57222213 115.19037865  28.22297626  39.25919921
  15.56833503  18.29072891 103.90360216]


{'spreadsheetId': '1DPg6_La4imPZVX83Jqd-5U9pd7AGN5j_w8GSd9TlmWk',
 'updatedRange': "'test 13\\'!K35:R46",
 'updatedRows': 12,
 'updatedColumns': 8,
 'updatedCells': 96}

In [None]:
subrow = 2
subcol = 10
row = 2
column = 2

worksheet.update([name_of_properties.tolist()], ccell(column, row-1))
for i in range(pairs.shape[0]):
  print(f'Обучение SVR по индекксам {name_of_indices[pairs[i, 0]]} и {name_of_indices[pairs[i, 1]]}')
  grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
  pair_of_indices = np.append([indices_train_scaled[:, pairs[i, 0]]], [indices_train_scaled[:, pairs[i, 1]]], axis=0).T
  title = np.array([["C"], ["epsilon"], ["MSE"], ["a0"], [name_of_indices[pairs[i, 0]]], [name_of_indices[pairs[i, 1]]]])
  iteration = np.empty_like(title)
  for j in range(properties_train.shape[1]):
    print(f"Обучение SVR по свойству {name_of_properties[j]}...")
    grid_search.fit(pair_of_indices, properties_train_scaled[:, j])
    coefficients = np.array([])
    mean_of_pair_indices = np.array([])
    for k in range(pair_of_indices.shape[1]):
      coefficients = np.append(coefficients, (grid_search.best_estimator_.coef_[0, k] / std_indices[pairs[i, k]]) * std_properties[j])
      mean_of_pair_indices = np.append(mean_of_pair_indices, mean_indices[pairs[i, k]])
    intercept = (grid_search.best_estimator_.intercept_[0] * std_properties[j]) + mean_properties[j] - np.sum(coefficients * mean_of_pair_indices)
    subiteration = np.array(np.array(list(grid_search.best_params_.values())[0:2]).reshape(-1, 1))
    subiteration = np.append(subiteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
    subiteration = np.append(subiteration, np.round(coefficients.reshape(-1, 1), 6), axis=0)
    subiteration = np.append(subiteration, np.round(np.array([intercept]), 6).reshape(-1, 1), axis=0) # Reshape intercept to 2D
    iteration = np.hstack((iteration, subiteration))
  iteration = np.delete(iteration, 0, axis=1)
  worksheet.update(title.tolist(), ccell(column-1, row))
  worksheet.update(iteration.tolist(), ccell(column, row), value_input_option='USER_ENTERED')
  row += title.shape[0]

Коэффициенты скалированные

In [None]:
subrow = 2
subcol = 10
row = 2
column = 2
worksheet.update([name_of_properties.tolist()], ccell(column, row-1))
for i in range(pairs.shape[0]):
  print(f'Обучение SVR по индекксам {name_of_indices[pairs[i, 0]]} и {name_of_indices[pairs[i, 1]]}')
  grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
  pair_of_indices = np.append([indices_train_scaled[:, pairs[i, 0]]], [indices_train_scaled[:, pairs[i, 1]]], axis=0).T
  title = np.array([["C"], ["epsilon"], ["MSE"], ["a0"], [name_of_indices[pairs[i, 0]]], [name_of_indices[pairs[i, 1]]]])
  iteration = np.empty_like(title)
  for j in range(properties_train.shape[1]):
    print(f"Обучение SVR по свойству {name_of_properties[j]}...")
    grid_search.fit(pair_of_indices, properties_train_scaled[:, j])
    subiteration = np.array(np.array(list(grid_search.best_params_.values())[0:2]).reshape(-1, 1))
    subiteration = np.append(subiteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
    subiteration = np.append(subiteration, np.round(np.array([grid_search.best_estimator_.intercept_]), 6), axis=0)
    subiteration = np.append(subiteration, np.round(grid_search.best_estimator_.coef_.reshape(-1, 1), 6), axis=0)
    iteration = np.hstack((iteration, subiteration))
  iteration = np.delete(iteration, 0, axis=1)
  worksheet.update(title.tolist(), ccell(column-1, row))
  worksheet.update(iteration.tolist(), ccell(column, row), value_input_option='USER_ENTERED')
  row += title.shape[0]

1d вариация


In [None]:
subrow = 2
subcol = 10
row = 2
column = 2
for i in range(properties_train.shape[1]):
  print(f"Training SVR for property {name_of_properties[i]}...")
  title = np.array([[f"{name_of_properties[i]}"], ["C"], ["epsilon"], ["gamma"], ["MSE"], ["a0"], ["a1"]])
  worksheet.update(title.tolist(), f'{calculate_column(column-1)}{row}')
  worksheet.update_acell(f'{calculate_column(subcol)}{subrow}', f'{name_of_properties[i]}')
  iteration = np.empty_like(title)
  for j in range(indices_train.shape[1]):
    print(f"By molecular indice {name_of_indices[j]}...")
    grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
    grid_search.fit(indices_train_scaled[:, j].reshape(-1, 1), properties_train_scaled[:, i])
    coefficients = (grid_search.best_estimator_.coef_[0, 0] / std_indices[j]) * std_properties[i]
    intercept = (grid_search.best_estimator_.intercept_[0] * std_properties[i]) + mean_properties[i] - np.sum(coefficients * mean_indices[j])
    subiteration = np.array([["X"]])
    subiteration = np.append(subiteration, np.array(list(grid_search.best_params_.values())[0:3]).reshape(-1, 1), axis=0)
    subiteration = np.append(subiteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
    subiteration = np.append(subiteration, np.round(np.array([grid_search.best_estimator_.intercept_]), 6), axis=0)
    subiteration = np.append(subiteration, np.round(grid_search.best_estimator_.coef_, 6), axis=0)
    iteration = np.hstack((iteration, subiteration))
  iteration = np.delete(iteration, 0, axis=1)
  worksheet.update_acell(f'{calculate_column(subcol+1)}{subrow}', f'=ИНДЕКС(b1:i1, Поискпоз(макс({calculate_column(column)}{row+4}:{calculate_column(column+iteration.shape[1]-1)}{row+4}), {calculate_column(column)}{row+4}:{calculate_column(column+iteration.shape[1]-1)}{row+4}, 0))')
  subrow += 1
  worksheet.update(iteration.tolist(), f'{calculate_column(column)}{row}', value_input_option='USER_ENTERED')
  row += title.shape[0]

#8d Вариация

In [None]:
subrow = 2
subcol = 10
row = 2
column = 2

worksheet.update([name_of_properties.tolist()], ccell(column, row-1))
title = np.array([["C"], ["epsilon"], ["gamma"], ["MSE"], ["a0"]])
title = np.append(title, name_of_indices.reshape(-1, 1), axis=0)
worksheet.update(title.tolist(), ccell(1, 2))
for i in range(properties_train.shape[1]):
  print(f"Обучение SVR по свойству {name_of_properties[i]}...")
  grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)
  grid_search.fit(indices_train_scaled, properties_train_scaled[:, i])
  coefficients = (grid_search.best_estimator_.coef_ / std_indices) * std_properties[i]
  intercept = (grid_search.best_estimator_.intercept_ * std_properties[i]) + mean_properties[i] - np.sum(coefficients  * mean_indices)
  #coefficients = grid_search.best_estimator_.coef_
  #intercept = grid_search.best_estimator_.intercept_
  #iteration = np.array(np.array(list(grid_search.best_params_.values())[0:3]).reshape(-1, 1))
  #iteration = np.append(iteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
  #iteration = np.append(iteration, np.round(coefficients.reshape(-1, 1), 6), axis=0)
  #iteration = np.append(iteration, np.round(np.array([intercept]), 6), axis=0)
  #worksheet.update(iteration.tolist(), f'{calculate_column(column)}{row}', value_input_option='USER_ENTERED')
  #column += 1

Обучение SVR по свойству Boiling point...
Обучение SVR по свойству Melting point...
Обучение SVR по свойству Flash point...
Обучение SVR по свойству Enthalpy of vaporization...
Обучение SVR по свойству Molar refraction...
Обучение SVR по свойству Polarization...
Обучение SVR по свойству Surface tension...
Обучение SVR по свойству Molar Volume...


# Я не знаю, что это, но пусть останется


In [None]:
subrow = 2
subcol = 10
row = 2
column = 2
for i in range(properties_train.shape[1]):
  print(f"Обучение SVR по свойству {name_of_properties[i]}...")
  grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)
  grid_search.fit(indices_train, properties_train[:, i])
  coefficients = (grid_search.best_estimator_.coef_ / std_indices) * std_properties[i]
  intercept = (grid_search.best_estimator_.intercept_ * std_properties[i]) + mean_properties[i] - np.sum((coefficients / std_indices) * std_properties[i] * mean_indices)

  iteration = np.array(np.array(list(grid_search.best_params_.values())[0:3]).reshape(-1, 1))
  iteration = np.append(iteration, [[np.round(grid_search.best_score_, 4)]], axis=0)
  iteration = np.append(iteration, np.round(coefficients.reshape(-1, 1), 6), axis=0)
  iteration = np.append(iteration, np.round(np.array([intercept]), 6), axis=0)
  worksheet.update(iteration.tolist(), f'{calculate_column(column)}{row}', value_input_option='USER_ENTERED')
  column += 1


Обучение SVR по свойству Boiling point...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Melting point...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Flash point...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Enthalpy of vaporization...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Molar refraction...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Polarization...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Surface tension...
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Обучение SVR по свойству Molar Volume...
Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [None]:
param_grid = {
    'kernel': ['linear'],
    'C':[0.01, 0.1, 1, 10, 100, 1000],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
    'epsilon': [0.01, 0.1, 0.2, 0.5]
    }