# Main explanation

#### Good lecture : https://www.cambridge.org/core/journals/journal-of-materials-research/article/double-perovskite-sr2bbo6-oxides-for-hightemperature-thermoelectric-power-generationa-review/87CFBEC724851ABAFEDB893AD7E747E8/core-reader

#### ISCD QUERY
One search on the ICSD was made for spacegroupe : Fm-3m with number of elements <=2 - 4776 queries
The second was made for spacegroupe : Fm-3m with number of elements >2 - 6123 queries
The third was made for spacegroupe : Fm-3m with number of elements >3 - 2037 queries

## Importation

In [None]:
import pandas as pd
import numpy as np
import time  
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as pltimport sys as os
import aiida
from scipy.signal import savgol_filter
from scipy.signal import find_peaks_cwt
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [None]:
df1 = pd.read_json("ISCD3-2/ISCD3_CollCode28065.cif")

## Openning from text file

In [None]:
df = pd.DataFrame(pd.read_table("ISCD3.txt")) #even thoug the data is already frammed
df1 = pd.read_json("ISCD3-2/ISCD3_CollCode28065.cif")
df.to_csv('ISCD333.csv')
df

# Second method using shell
Comment - I had issues in downloading the aiida framework and to get login/password for distant acces

## Using ssh to connect to the server hosting the database 

In [None]:
!ssh -fN -L 3306:localhost:3306 -L 8010:localhost:80 aiida@theossrv2.epfl.ch > /dev/null 2>&1 #using username of someone from aiida team

## Loading the ICSD importer class and instantiate it

In [None]:
importer_class = DbImporterFactory('icsd')
importer_parameters = {'server': 'http://theossrv2.epfl.ch',
            'host': '127.0.0.1',
            'db': 'icsd',
            'passwd': 'sql'

importer = importer_class(**importer_parameters)
importer

In [None]:
importer_class = DbImporterFactory('icsd')

## Query the ICSD for a given database entry 



In [None]:
icsd_id = '617290' #graphite

query_results = importer.query(id=icsd_id)

query_results_perovskyte_type = importer.query(spacegroup='F m -3 m')
# ou Fm3m avec une barre sur le 3 (Fm-3m??)

# Hall notation : -F 4 2 3
# Hermang : F m -3 m 
# number : 225


print len(query_results)
print len(query_results_perovskyte_type)

## Get the cif file corresponding to the entry, and convert it to an AiiDA structure



In [None]:
entre = query_results.at(0)
cif = entry.get_cif_node()
print cif.get_formulae()

structure = cif._get_aiida_structure(converter='pymatgen')
print structure.get_formula()
print structure.get_ase()

## Get the spacegroup of the structure with spglib



In [None]:
print spglib.get_spacegroup(structure.get_ase())

## Store the structure and prints its pk



In [None]:
structure.store()
structure.pk

### Now access the terminal and type the following to visualise the structure


#### verdi data structure show --format xcrysden 5084



# MACHINE LEARNING


## Import multiple classification algorithms from scikt-learn


In [None]:
# Multinomial Naive Bayes Classifier
def naive_bayes_classifier(train_x, train_y):
    from sklearn.naive_bayes import MultinomialNB
    model = MultinomialNB(alpha=0.01)
    model.fit(train_x, train_y)
    return model


# KNN Classifier
def knn_classifier(train_x, train_y):
    from sklearn.neighbors import KNeighborsClassifier
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(train_x, train_y)
    return model


# Logistic Regression Classifier
def logistic_regression_classifier(train_x, train_y):
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(penalty='l2')
    model.fit(train_x, train_y)
    return model


# Random Forest Classifier
def random_forest_classifier(train_x, train_y):
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=100)
    model.fit(train_x, train_y)
    return model


# Decision Tree Classifier
def decision_tree_classifier(train_x, train_y):
    from sklearn import tree
    model = tree.DecisionTreeClassifier()
    model.fit(train_x, train_y)
    return model


# GBDT(Gradient Boosting Decision Tree) Classifier
def gradient_boosting_classifier(train_x, train_y):
    from sklearn.ensemble import GradientBoostingClassifier
    model = GradientBoostingClassifier(n_estimators=100)
    model.fit(train_x, train_y)
    return model


# SVM Classifier
def svm_classifier(train_x, train_y):
    from sklearn.svm import SVC
    model = SVC(kernel='rbf', probability=True)
    model.fit(train_x, train_y)
    return model



 # 3 layer neural network classficiation
def mlp_classifier(train_x,train_y):
    from sklearn.neural_network import MLPClassifier
    model =  MLPClassifier(hidden_layer_sizes=(256,256,256), max_iter=200, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-6, random_state=1,
                    learning_rate_init=.1)
    model.fit(train_x,train_y)
    return model