In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf


from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

import pymatgen

from functions.prepare_data_and_sub import prepare_dataset, make_prediction, energy_within_threshold
from functions.class2_features import classificate_class2, unufication_class2, geometrical_features
from functions.strusture_analysis import decompose, make_masks, find_differ_sites, extract_ideal_structure

In [2]:
data = prepare_dataset('data/dichalcogenides_public/')
data['decomposition'] = data.structures.apply(decompose)
mask_list = make_masks(data)
ideal_structure_sites = extract_ideal_structure(data)  
data['representative'] = data['structures'].apply(lambda x: find_differ_sites(x.sites, ideal_structure_sites))

  0%|          | 0/380 [00:00<?, ?it/s]

In [3]:
test = prepare_dataset('data/dichalcogenides_private/', train=False)
test['decomposition'] = test.structures.apply(decompose)
mask_list_test = make_masks(test)
test['representative'] = test['structures'].apply(lambda x: find_differ_sites(x.sites, ideal_structure_sites))

test = make_prediction(test, ideal_structure_sites, mask_list_test)

### Class2:

In [4]:
model = DecisionTreeClassifier(max_depth=15, random_state=13)

class2 = data[mask_list[2]].copy()
class2['group'] = class2['targets'].apply(classificate_class2)

class2 = unufication_class2(class2)
class2 = geometrical_features(class2)

test_class2 = unufication_class2(test[mask_list_test[2]])
test_class2 = geometrical_features(test_class2)

feature_list = ['Mo_S_dist', 'S_Se_dist', 'Mo_Se_dist']
model.fit(class2[feature_list], class2['group'])

y_predicted3 = model.predict(test_class2[feature_list])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['representative'] = sample['representative'].apply(lambda x: symmetry_transform_up(x.copy(), x[0].coords))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['representative'] = sample['representative'].apply(lambda x: orange_closer(x.copy()))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [8]:
class2 = class2[(class2.group == 2) | (class2.group == 1)]

In [9]:
class2.columns

Index(['structures', 'targets', 'decomposition', 'representative', 'group',
       'Mo_coords_0', 'Mo_coords_1', 'Mo_coords_2', 'S_coords_0', 'S_coords_1',
       'S_coords_2', 'Se_coords_0', 'Se_coords_1', 'Se_coords_2', 'Mo_S_dist',
       'S_Se_dist', 'Mo_Se_dist', 'M_is_boarder', 'S_is_boarder',
       'Se_is_boarder', 'M_is_boarder_0', 'M_is_boarder_1', 'S_is_boarder_0',
       'S_is_boarder_1', 'Se_is_boarder_1', 'Se_is_boarder_0'],
      dtype='object')

In [14]:
from xgboost import XGBClassifier

feature_list = list(class2.columns[5:])

model = XGBClassifier(n_estimators=150, random_state=0xC0FFEE)
model.fit(class2[feature_list], class2['group'])
y_predicted_12 = model.predict(test_class2[feature_list])






In [16]:
mask_class2_1 = np.array([False] * len(test))
mask_class2_1[mask_list_test[2]] = (y_predicted3 != 3) & (y_predicted_12 == 1)

mask_class2_2 = np.array([False] * len(test))
mask_class2_2[mask_list_test[2]] = (y_predicted3 != 3) & (y_predicted_12 == 2)

mask_class2_3 = np.array([False] * len(test))
mask_class2_3[mask_list_test[2]] = (y_predicted3 == 3)

test.loc[mask_class2_1, 'predictions'] = 0.41
test.loc[mask_class2_2, 'predictions'] = 0.36
test.loc[mask_class2_3, 'predictions'] = 0.29

In [None]:
our_submission = test[['predictions']].copy()
baseline = pd.read_csv('submission.csv', index_col=0)

unite_sub = our_submission.join(baseline, lsuffix='_our', rsuffix='_bas')
unite_sub.loc[mask_list_test[0] | mask_list_test[1] | mask_list_test[2], 'predictions'] = unite_sub['predictions_our']
unite_sub.loc[~(mask_list_test[0] | mask_list_test[1] | mask_list_test[2]), 'predictions'] = unite_sub['predictions_bas']
unite_sub['predictions'].to_csv('best_submission_in_the_world1_full.csv')