In [None]:
import seaborn as sns
sns.set()
import altair as alt

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras


# SQLite and Data Preprocessing 


### SQL to Dataframe  

In [None]:
import sqlalchemy
from sqlalchemy import create_engine

from sqlalchemy import inspect


In [None]:
engine = create_engine("sqlite:///microstructures.sqlite")

In [None]:
MicrostrucureData = sqlalchemy.MetaData(bind = engine)

In [None]:
conn = engine.connect()

In [None]:
#### JOINS the micrograph table to the sample data table


micrographs = """

SELECT *
FROM micrograph JOIN sample ON sample_id = sample_key


"""

In [None]:
###  This is the main dataframe
micrographs_df = pd.read_sql_query(micrographs, conn)

### Custom Transformers for Data Preprocessing

The anneal time is in minutes and hours, and we will convert evething to minutes.

In [None]:
from sklearn.base import BaseEstimator,TransformerMixin

In [None]:
class ToMinute(BaseEstimator, TransformerMixin):
    
    def __init__(self, dataseries):
        self.dataseries = dataseries
        self.multi = self.dataseries.apply(self._M_to_K)
        
    def _M_to_K(self, char):
        if char == 'H':
            return 60
        if char == 'M':
            return 1
        else: 
            return 0
        
    def fit(self,X,y = None):
        return self
    
    
    def transform(self, X, y = None):
        
        return X*self.multi
    

## This is our main preprocssing dataframe

In [None]:
###  This is our main data frame before preprocessing

preprocess_micrographs_df = micrographs_df[['path',
                                 'sample_id',
                                 'anneal_time',
                                 'anneal_time_unit',
                                 'anneal_temperature',
                                 'cool_method'
                                ]]

# Transfer Learning:  InceptionV3 Regeression Model 

## Inverse Temperature and Log Time with stratified train-test data


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt


In [None]:
to_minute = ToMinute(preprocess_micrographs_df['anneal_time_unit'])

Kelvin_minute_micrograph = preprocess_micrographs_df.copy()
Kelvin_minute_micrograph['anneal_temperature_Kelvin'] = Kelvin_minute_micrograph['anneal_temperature'] + 273.15
Kelvin_minute_micrograph['anneal_time_minutes'] = to_minute.transform(preprocess_micrographs_df['anneal_time'])

In [None]:
Kelvin_minute_micrograph_dropna = Kelvin_minute_micrograph[['path',
                                              'sample_id', 
                                              'anneal_temperature_Kelvin', 
                                              'anneal_time_minutes','cool_method']].dropna()

Kelvin_minute_micrograph_dropna['log_time'] = Kelvin_minute_micrograph_dropna['anneal_time_minutes'].apply(lambda x : np.log(x))
Kelvin_minute_micrograph_dropna['inverse_anneal_temperature_Kelvin'] = 1/Kelvin_minute_micrograph_dropna['anneal_temperature_Kelvin']

In [None]:
ColScaler = ColumnTransformer([('scaler',StandardScaler(),['inverse_anneal_temperature_Kelvin',
                                                                   'log_time',
                                                                  ])], 
                                                                  remainder = 'passthrough')

ColScaler.set_output(transform ='pandas')

scaled_regression_data = ColScaler.fit_transform(Kelvin_minute_micrograph_dropna)

scaled_regression_data.rename(columns ={'remainder__path':'path', 
                                        'remainder__sample_id':'sample_id', 
                                        'remainder__sample_weights':'sample_weights',
                                        'remainder__cool_method':'cool_method',
                                        'scaler__inverse_anneal_temperature_Kelvin':'inverse_anneal_temperature_Kelvin',
                                        'scaler__log_time':'log_time'}, inplace = True)

In [None]:
# one hot encoding the cooling methods
ohe = pd.get_dummies(scaled_regression_data['cool_method'])
dummies = list(ohe.columns.unique())

scaled_regression_data = scaled_regression_data.join(ohe)

In [None]:
train_regression_preweight, test_regression_preweight = train_test_split(scaled_regression_data, 
                                         test_size = 0.1, 
                                         stratify = scaled_regression_data['sample_id'], 
                                         random_state = 23)

In [None]:
#Weighting the samples based on Sample Id's

sample_weights = compute_class_weight(class_weight = 'balanced',
                                     classes = train_regression_preweight['sample_id'].unique(),
                                     y = train_regression_preweight['sample_id'])

SAMPLE_WEIGHTS = pd.DataFrame(zip(train_regression_preweight['sample_id'].unique(),sample_weights), columns = ['sample_id','sample_weights'])

In [None]:
#attaching sample weights to dataframes for ImageDataGenerator

train_regression = pd.merge(train_regression_preweight, SAMPLE_WEIGHTS, on = 'sample_id')
test_regression = pd.merge(test_regression_preweight, SAMPLE_WEIGHTS, on = 'sample_id')

In [None]:
REG_DATAFRAME = train_regression
DIRECTORY = 'micrographs'
REG_XCOL = 'path'
REG_YCOL = ['inverse_anneal_temperature_Kelvin','log_time'] + dummies
TARGET_SIZE = (522,645)
BATCH_SIZE = 32


In [None]:

data_generator = ImageDataGenerator()
validation_generator = ImageDataGenerator()
test_datagenerator = ImageDataGenerator()


train_regression_generator = data_generator.flow_from_dataframe(dataframe = REG_DATAFRAME,
                                                directory = DIRECTORY,
                                                x_col = REG_XCOL,
                                                y_col = REG_YCOL,
                                                class_mode= 'raw',
                                                color_mode = 'rgb',
                                                sample_weights = 'sample_weights',
                                                target_size = TARGET_SIZE,
                                                batch_size = 32)

validation_regression_generator = validation_generator.flow_from_dataframe(dataframe = REG_DATAFRAME,
                                                directory = DIRECTORY,
                                                x_col = REG_XCOL,
                                                y_col = REG_YCOL,
                                                class_mode = 'raw',
                                                color_mode = 'rgb',
                                                sample_weights = 'sample_weights',
                                                target_size = TARGET_SIZE,
                                                batch_size = 32)


test_regression_generator = test_datagenerator.flow_from_dataframe(dataframe = test_regression,
                                                        directory = DIRECTORY,
                                                        x_col = REG_XCOL,
                                                        y_col = REG_YCOL,
                                                        class_mode = 'raw',
                                                        color_mode = 'rgb',
                                                        shuffle = False,
                                                        target_size = TARGET_SIZE,
                                                        batch_size = 1)



In [None]:
#inception model that accepts the images cropped to (482,645) to remove annotations from training data

inception_crop = keras.applications.inception_v3.InceptionV3(include_top = False, 
                                                                weights = 'imagenet', 
                                                                input_shape = (482,645,3))

In [None]:
#making inception layers undtrainable for intial phase of model training

for layer in inception_crop.layers:
    layer.trainable = False

In [None]:

def inception_regression_model_builder(training_data):
    model = Sequential()
    
    model.add(tf.keras.Input(shape=(522,645,3)))
    model.add(layers.Rescaling(scale = 1./255))
    
    #Cropping image to remove image annotations
    model.add(layers.Cropping2D(
              cropping=((0, 40), (0, 0))
                ))
    #Data Augmentation
    model.add(layers.RandomFlip())
    model.add(layers.RandomRotation(factor = 0.4, 
                                    fill_mode = 'reflect'))
    model.add(layers.RandomZoom(.4,.2))
    model.add(layers.RandomContrast(.2)) 
    model.add(layers.RandomTranslation(.2,.2,fill_mode='reflect',interpolation='bilinear'))
    
    #inception layer
    model.add(inception_crop)
    model.add(layers.Flatten())
    

    #Dense Layers
    model.add(layers.Dense(1000, activation = None))

    model.add(layers.BatchNormalization(momentum=.99))
    model.add(layers.Activation('relu'))
    
    model.add(layers.Dropout(.5))
    
    
    model.add(layers.Dense(500 , activation = None))
    
    model.add(layers.BatchNormalization(momentum=.99))
    model.add(layers.Activation('relu'))
    
    model.add(layers.Dropout(.5))
    
    model.add(layers.Dense(9))
    
    model.compile(optimizer = 'adam',
                loss ='mse',
                metrics=[tf.keras.metrics.mean_squared_error]
                 )
    
    model.summary()
    return model


In [None]:
inception_regression_model = inception_regression_model_builder(train_regression)


In [None]:
inception_regression_model_fit = inception_regression_model.fit(train_regression_generator,
                                                                steps_per_epoch = train_regression_generator.samples/BATCH_SIZE,
                                                                epochs = 1,
                                                                validation_data = validation_regression_generator,
                                                                validation_steps = validation_regression_generator.samples/BATCH_SIZE,
                                                                callbacks =  None
                                                                )
    

    

In [None]:
inception_regression_model.save()

In [None]:
inception_regression_model = tf.keras.models.load_model()

In [None]:
#This cell is used for fine-tuning the inception model 

for layer in inception_regression_model.layers:
    layer.trainable = True

In [None]:
model_evals =[]

In [None]:
evals = inception_regression_model.evaluate(test_regression_generator)
model_evals.append(evals)

In [None]:
model_evals[-5:]

In [None]:
r2_scores_2 =[]

In [None]:
nb_samples = len(test_regression)

predict = inception_regression_model.predict(test_regression_generator,nb_samples)

In [None]:
temp_r2 = r2_score(test_regression['inverse_anneal_temperature_Kelvin'],predict[:,0])
time_r2 = r2_score(test_regression['log_time'],predict[:,1])
r2_scores_2.append((temp_r2,time_r2))

In [None]:
r2_scores_2[-5:]

In [None]:
predict_transform = ColScaler.named_transformers_['scaler'].inverse_transform(predict[:,:2])

In [None]:
test_unscaled =  ColScaler.named_transformers_['scaler']\
                          .inverse_transform(test_regression[['inverse_anneal_temperature_Kelvin',
                                                              'log_time']])
temps = 1/test_unscaled[:,0]
times = np.exp(test_unscaled[:,1])
                                        
test_check = pd.DataFrame({'temperature':temps,'time': times})


In [None]:

predict_df = pd.DataFrame(predict_transform, columns = ['inverse_temperature','anneal_time_minutes'])
predict_df['time'] = predict_df['anneal_time_minutes'].apply(lambda x: np.exp(x))
predict_df['temperature'] = 1/predict_df['inverse_temperature']
predict_df['test_temperature'] = test_check['temperature']
predict_df['test_time'] = test_check['time']
predict_df['delta time'] =  predict_df['time'] - predict_df['test_time'] 
predict_df['delta temperature'] =  predict_df['temperature'] - predict_df['test_temperature'] 

In [None]:
offset = -150

predict_df_chart = alt.Chart(predict_df).mark_point(color = 'orangered').encode(
    x = alt.X('delta time', 
              axis = alt.Axis(offset = offset, title ='Time Difference (Minutes)', titleY = 175),
              scale = alt.Scale(domain=[-1000, 1000])),
              
    y = alt.Y('delta temperature', 
              axis = alt.Axis(offset = offset,title ='Temperature Difference (K)',titleX = -175),
              scale = alt.Scale(domain=[-100, 100])),
    
  
)


predict_df_chart

In [None]:
mae_list = []

In [None]:
temp_mae = mean_absolute_error(predict_df['temperature'],test_check['temperature'])
time_mae = mean_absolute_error(predict_df['time'],test_check['time'])
mae_list.append((temp_mae,time_mae))
mae_list[-5:]

In [None]:
mae_list = []