In [1]:
import seaborn as sns
sns.set()
import altair as alt

In [2]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras


# SQLite and Data Preprocessing 


### SQL to Dataframe  

In [3]:
import sqlalchemy
from sqlalchemy import create_engine

from sqlalchemy import inspect


In [4]:
engine = create_engine("sqlite:///microstructures.sqlite")

In [5]:
MicrostrucureData = sqlalchemy.MetaData(bind = engine)

In [6]:
conn = engine.connect()

In [8]:
#### JOINS the micrograph table to the sample data table


micrographs = """

SELECT *
FROM micrograph JOIN sample ON sample_id = sample_key


"""

In [9]:
###  This is the main dataframe
micrographs_df = pd.read_sql_query(micrographs, conn)

### Custom Transformers for Data Preprocessing

The anneal time is in minutes and hours, and we will convert evething to minutes.

In [10]:
from sklearn.base import BaseEstimator,TransformerMixin

In [13]:
class ToMinute(BaseEstimator, TransformerMixin):
    
    def __init__(self, dataseries):
        self.dataseries = dataseries
        self.multi = self.dataseries.apply(self._M_to_K)
        
    def _M_to_K(self, char):
        if char == 'H':
            return 60
        if char == 'M':
            return 1
        else: 
            return 0
        
    def fit(self,X,y = None):
        return self
    
    
    def transform(self, X, y = None):
        
        return X*self.multi
    

## This is our main preprocssing dataframe

In [14]:
###  This is our main data frame before preprocessing

preprocess_micrographs_df = micrographs_df[['path',
                                 'sample_id',
                                 'anneal_time',
                                 'anneal_time_unit',
                                 'anneal_temperature',
                                 'cool_method'
                                ]]

# Transfer Learning:  InceptionV3 Regeression Model 

## Inverse Temperature and Log Time with stratified train-test data


In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt


In [23]:
to_minute = ToMinute(preprocess_micrographs_df['anneal_time_unit'])

Kelvin_minute_micrograph = preprocess_micrographs_df.copy()
Kelvin_minute_micrograph['anneal_temperature_Kelvin'] = Kelvin_minute_micrograph['anneal_temperature'] + 273.15
Kelvin_minute_micrograph['anneal_time_minutes'] = to_minute.transform(preprocess_micrographs_df['anneal_time'])

In [25]:
Kelvin_minute_micrograph_dropna = Kelvin_minute_micrograph[['path',
                                              'sample_id', 
                                              'anneal_temperature_Kelvin', 
                                              'anneal_time_minutes','cool_method']].dropna()

Kelvin_minute_micrograph_dropna['log_time'] = Kelvin_minute_micrograph_dropna['anneal_time_minutes'].apply(lambda x : np.log(x))
Kelvin_minute_micrograph_dropna['inverse_anneal_temperature_Kelvin'] = 1/train_regression_start['anneal_temperature_Kelvin']

In [27]:
ColScaler = ColumnTransformer([('scaler',StandardScaler(),['inverse_anneal_temperature_Kelvin',
                                                                   'log_time',
                                                                  ])], 
                                                                  remainder = 'passthrough')

ColScaler.set_output(transform ='pandas')

scaled_regression_data = ColScaler.fit_transform(Kelvin_minute_micrograph_dropna)

scaled_regression_data.rename(columns ={'remainder__path':'path', 
                                        'remainder__sample_id':'sample_id', 
                                        'remainder__sample_weights':'sample_weights',
                                        'remainder__cool_method':'cool_method',
                                        'scaler__inverse_anneal_temperature_Kelvin':'inverse_anneal_temperature_Kelvin',
                                        'scaler__log_time':'log_time'}, inplace = True)

In [28]:
# one hot encoding the cooling methods
ohe = pd.get_dummies(scaled_regression_data['cool_method'])
dummies = list(ohe.columns.unique())

scaled_regression_data = scaled_regression_data.join(ohe)

In [30]:
train_regression_preweight, test_regression_preweight = train_test_split(scaled_regression_data, 
                                         test_size = 0.1, 
                                         stratify = regression_data['sample_id'], 
                                         random_state = 23)

In [31]:
#Weighting the samples based on Sample Id's

sample_weights = compute_class_weight(class_weight = 'balanced',
                                     classes = train_regression_preweight['sample_id'].unique(),
                                     y = train_regression_preweight['sample_id'])

SAMPLE_WEIGHTS = pd.DataFrame(zip(train_regression_preweight['sample_id'].unique(),sample_weights), columns = ['sample_id','sample_weights'])

In [32]:
#attaching sample weights to dataframes for ImageDataGenerator

train_regression = pd.merge(train_regression_preweight, SAMPLE_WEIGHTS, on = 'sample_id')
test_regression = pd.merge(test_regression_preweight, SAMPLE_WEIGHTS, on = 'sample_id')

In [33]:
REG_DATAFRAME = train_regression
DIRECTORY = 'micrographs'
REG_XCOL = 'path'
REG_YCOL = ['inverse_anneal_temperature_Kelvin','log_time'] + dummies
TARGET_SIZE = (522,645)
BATCH_SIZE = 32


In [34]:

data_generator = ImageDataGenerator()
validation_generator = ImageDataGenerator()
test_datagenerator = ImageDataGenerator()


train_regression_generator = data_generator.flow_from_dataframe(dataframe = REG_DATAFRAME,
                                                directory = DIRECTORY,
                                                x_col = REG_XCOL,
                                                y_col = REG_YCOL,
                                                class_mode= 'raw',
                                                color_mode = 'rgb',
                                                sample_weights = 'sample_weights',
                                                target_size = TARGET_SIZE,
                                                batch_size = 32)

validation_regression_generator = validation_generator.flow_from_dataframe(dataframe = REG_DATAFRAME,
                                                directory = DIRECTORY,
                                                x_col = REG_XCOL,
                                                y_col = REG_YCOL,
                                                class_mode = 'raw',
                                                color_mode = 'rgb',
                                                sample_weights = 'sample_weights',
                                                target_size = TARGET_SIZE,
                                                batch_size = 32)


test_regression_generator = test_datagenerator.flow_from_dataframe(dataframe = test_regression,
                                                        directory = DIRECTORY,
                                                        x_col = REG_XCOL,
                                                        y_col = REG_YCOL,
                                                        class_mode = 'raw',
                                                        color_mode = 'rgb',
                                                        shuffle = False,
                                                        target_size = TARGET_SIZE,
                                                        batch_size = 1)



Found 538 validated image filenames.
Found 538 validated image filenames.
Found 60 validated image filenames.


In [None]:
#inception model that accepts the images cropped to (482,645) to remove annotations from training data

inception_crop = keras.applications.inception_v3.InceptionV3(include_top = False, 
                                                                weights = 'imagenet', 
                                                                input_shape = (482,645,3))

In [75]:
#making inception layers undtrainable for intial phase of model training

for layer in inception_crop.layers:
    layer.trainable = False

In [87]:

def inception_regression_model_builder(training_data):
    model = Sequential()
    
    model.add(tf.keras.Input(shape=(522,645,3)))
    model.add(layers.Rescaling(scale = 1./255))
    
    #Cropping image to remove image annotations
    model.add(layers.Cropping2D(
              cropping=((0, 40), (0, 0))
                ))
    #Data Augmentation
    model.add(layers.RandomFlip())
    model.add(layers.RandomRotation(factor = 0.4, 
                                    fill_mode = 'reflect'))
    model.add(layers.RandomZoom(.4,.2))
    model.add(layers.RandomContrast(.2)) 
    model.add(layers.RandomTranslation(.2,.2,fill_mode='reflect',interpolation='bilinear'))
    
    #inception layer
    model.add(inception_crop)
    model.add(layers.Flatten())
    

    #Dense Layers
    model.add(layers.Dense(1000, activation = None))

    model.add(layers.BatchNormalization(momentum=.99))
    model.add(layers.Activation('relu'))
    
    model.add(layers.Dropout(.5))
    
    
    model.add(layers.Dense(500 , activation = None))
    
    model.add(layers.BatchNormalization(momentum=.99))
    model.add(layers.Activation('relu'))
    
    model.add(layers.Dropout(.5))
    
    model.add(layers.Dense(9))
    
    model.compile(optimizer = 'adam',
                loss ='mse',
                metrics=[tf.keras.metrics.mean_squared_error]
                 )
    
    model.summary()
    return model


In [88]:
inception_regression_model = inception_regression_model_builder(train_regression)


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_5 (Rescaling)     (None, 522, 645, 3)       0         
                                                                 
 cropping2d_5 (Cropping2D)   (None, 482, 645, 3)       0         
                                                                 
 random_flip_4 (RandomFlip)  (None, 482, 645, 3)       0         
                                                                 
 random_rotation_4 (RandomR  (None, 482, 645, 3)       0         
 otation)                                                        
                                                                 
 random_zoom_4 (RandomZoom)  (None, 482, 645, 3)       0         
                                                                 
 random_contrast_4 (RandomC  (None, 482, 645, 3)       0         
 ontrast)                                             

In [116]:
inception_regression_model_fit = inception_regression_model.fit(train_regression_generator,
                                                                steps_per_epoch = train_regression_generator.samples/BATCH_SIZE,
                                                                epochs = 100,
                                                                validation_data = validation_regression_generator,
                                                                validation_steps = validation_regression_generator.samples/BATCH_SIZE,
                                                                callbacks =  None
                                                                )
    

    

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100


Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [51]:
inception_regression_model.save()

In [218]:
inception_regression_model.load_weights()

In [52]:
inception_regression_model.save_weights()

In [44]:
inception_regression_model = tf.keras.models.load_model()

In [40]:
#This cell is used for fine-tuning the inception model 

for layer in inception_regression_model_2.layers:
    layer.trainable = True

In [103]:
model_evals =[]

In [102]:
evals = inception_regression_model_2.evaluate(test_regression_generator)
model_evals.append(evals)



NameError: name 'model_evals' is not defined

In [1799]:
model_evals[-5:]

[0.7175,
 [0.6613301634788513, 0.6613301634788513],
 [0.6206040382385254, 0.6206040382385254],
 [0.2700064778327942, 0.21997803449630737],
 [0.3252373933792114, 0.26045286655426025],
 [0.3502933084964752, 0.2795868217945099],
 [0.3205432593822479, 0.2686660587787628],
 [0.32912155985832214, 0.2776298224925995],
 [0.3329106569290161, 0.2830042243003845],
 [0.33801811933517456, 0.28654128313064575],
 [0.31019920110702515, 0.2601570785045624],
 [0.3193950355052948, 0.2693491578102112],
 [0.3180180788040161, 0.269048810005188],
 [0.3180180788040161, 0.269048810005188],
 [0.2994304597377777, 0.2504594027996063],
 [0.31019920110702515, 0.2601570785045624],
 [0.36960166692733765, 0.29690948128700256],
 [0.35794970393180847, 0.30191484093666077],
 [0.35819512605667114, 0.30491355061531067],
 [0.3772590458393097, 0.3201053738594055],
 [0.3556934893131256, 0.3026147782802582],
 [0.3685082197189331, 0.3174241781234741],
 [0.348358690738678, 0.2980002760887146],
 [0.3497534990310669, 0.30260577797

In [48]:
r2_scores_2 =[]

In [117]:
nb_samples = len(test_regression)

predict = inception_regression_model.predict(test_regression_generator,nb_samples)



In [118]:
temp_r2 = r2_score(test_regression['inverse_anneal_temperature_Kelvin'],predict_2[:,0])
time_r2 = r2_score(test_regression['log_time'],predict_2[:,1])
r2_scores_2.append((temp_r2,time_r2))

In [119]:
r2_scores_2[-5:]

[(0.587563641798744, 0.4796155084408985),
 (0.6275749392080852, 0.6173230167825271),
 (0.7203277601289796, 0.6225787423236302),
 (0.8193726299197486, 0.7522543093834898),
 (0.7791916649664747, 0.7298726690897497)]

In [53]:
predict_transform = ColScaler.named_transformers_['scaler'].inverse_transform(predict[:,:2])

In [55]:
test_unscaled =  ColScaler.named_transformers_['scaler']\
                          .inverse_transform(test_regression[['inverse_anneal_temperature_Kelvin',
                                                              'log_time']])
temps = 1/test_unscaled[:,0]
times = np.exp(test_unscaled[:,1])
             #                           
test_check = pd.DataFrame({'temperature':temps,'time': times})


In [56]:

predict_df = pd.DataFrame(predict_transform, columns = ['inverse_temperature','anneal_time_minutes'])
predict_df['time'] = predict_df['anneal_time_minutes'].apply(lambda x: np.exp(x))
predict_df['temperature'] = 1/predict_df['inverse_temperature']
predict_df['test_temperature'] = test_check['temperature']
predict_df['test_time'] = test_check['time']

In [1556]:
chart_pred = alt.Chart(predict_df).mark_point().encode(
    x =alt.X('time').scale(type = 'log'),
    y = 'temperature')

#chart_pred.encoding.x.scale = alt.Scale(domain=[0, 5500])
chart_pred.encoding.y.scale = alt.Scale(domain=[900, 1500])
chart_pred

In [1563]:
chart_true = alt.Chart(predict_df).mark_point().encode(
    x = alt.X('test_time').scale(type = 'log'),
    y = 'test_temperature')

#chart_true.encoding.x.scale = alt.Scale(domain=[0, 5500])
chart_true.encoding.y.scale = alt.Scale(domain=[900, 1500])
chart_true

In [1905]:
chart_pred_cribbed = alt.Chart(sorted_cribbed_predict.head(20)).mark_point().encode(
    x ='time',
    y = 'temperature')

chart_pred_cribbed.encoding.x.scale = alt.Scale(domain=[0, 500])
chart_pred_cribbed.encoding.y.scale = alt.Scale(domain=[1100, 1300])
chart_pred_cribbed

In [2079]:
offset = -150

chart_true_cribbed = alt.Chart(sorted_cribbed_predict).mark_point(color = 'orangered').encode(
    x = alt.X('delta time', 
              axis = alt.Axis(offset = offset, title ='Time Difference (Minutes)', titleY = 175),
              scale = alt.Scale(domain=[-100, 100])),
              
    y = alt.Y('delta temperature', 
              axis = alt.Axis(offset = offset,title ='Temperature Difference (K)',titleX = -175),
              scale = alt.Scale(domain=[-100, 100])),
    
  
)


chart_true_cribbed 

In [63]:
offset = -150

predict_df_chart = alt.Chart(predict_df).mark_point(color = 'orangered').encode(
    x = alt.X('delta time', 
              axis = alt.Axis(offset = offset, title ='Time Difference (Minutes)', titleY = 175),
              scale = alt.Scale(domain=[-1000, 1000])),
              
    y = alt.Y('delta temperature', 
              axis = alt.Axis(offset = offset,title ='Temperature Difference (K)',titleX = -175),
              scale = alt.Scale(domain=[-100, 100])),
    
  
)


predict_df_chart

In [1937]:
sorted_cribbed_predict

Unnamed: 0,inverse_temperature,anneal_time_minutes,time,temperature,test_temperature,test_time,delta time,delta temperature,score
31,0.000808,4.502703,90.260751,1237.71167,1243.15,90.0,0.260751,-5.43833,5.444578
53,0.000803,4.585473,98.04956,1244.949951,1243.15,90.0,8.04956,1.799951,8.248347
52,0.00081,4.525278,92.321596,1235.01123,1243.15,90.0,2.321596,-8.13877,8.463414
1,0.000731,3.93447,51.13505,1368.005859,1373.15,60.0,-8.86495,-5.144141,10.249367
2,0.0008,4.395099,81.052629,1250.250488,1243.15,90.0,-8.947371,7.100488,11.422451
0,0.000725,3.90559,49.679385,1378.410156,1373.15,60.0,-10.320615,5.260156,11.583796
17,0.000805,4.315457,74.847847,1242.383545,1243.15,90.0,-15.152153,-0.766455,15.171526
15,0.000817,5.230235,186.836724,1223.484009,1243.15,180.0,6.836724,-19.665991,20.820471
19,0.000801,4.801433,121.684677,1248.499634,1243.15,90.0,31.684677,5.349634,32.133119
4,0.000847,4.829095,125.097741,1180.503418,1173.15,90.0,35.097741,7.353418,35.859785


In [2074]:
cribbed_predict = predict_df[(predict_df['test_time'] >= 50) & (predict_df['test_time'] <= 180)]

In [62]:
predict_df['delta time'] =  predict_df['time'] - predict_df['test_time'] 
predict_df['delta temperature'] =  predict_df['temperature'] - predict_df['test_temperature'] 

In [2075]:
cribbed_predict['delta time'] =   cribbed_predict['time']-cribbed_predict['test_time'] 
cribbed_predict['delta temperature'] =   cribbed_predict['temperature']- cribbed_predict['test_temperature'] 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cribbed_predict['delta time'] =   cribbed_predict['time']-cribbed_predict['test_time']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cribbed_predict['delta temperature'] =   cribbed_predict['temperature']- cribbed_predict['test_temperature']


In [2077]:
sorted_cribbed_predict= cribbed_predict.sort_values('score')

In [54]:
mae_list = []

In [2076]:
cribbed_predict['score'] = ((cribbed_predict['time']-cribbed_predict['test_time'])**2 +(cribbed_predict['temperature']-cribbed_predict['test_temperature'])**2)**.5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cribbed_predict['score'] = ((cribbed_predict['time']-cribbed_predict['test_time'])**2 +(cribbed_predict['temperature']-cribbed_predict['test_temperature'])**2)**.5


In [1477]:
sorted_cribbed_predict['delta_time'] = sorted_cribbed_predict['time'] - sorted_cribbed_predict['test_time']

In [1855]:
len(sorted_cribbed_predict)

30

In [61]:
temp_mae = mean_absolute_error(predict_df['temperature'],test_check['temperature'])
time_mae = mean_absolute_error(predict_df['time'],test_check['time'])
mae_list.append((temp_mae,time_mae))
mae_list[-5:]

[(14.821011962890603, 251.82929440796644)]

In [60]:
mae_list = []

In [1991]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(predict_df)

Unnamed: 0,inverse_temperature,anneal_time_minutes,time,temperature,test_temperature,test_time,delta time,delta temperature
0,0.000735,4.125991,61.929172,1360.474976,1373.15,60.0,1.929172,-12.675024
1,0.000734,4.027372,56.113245,1362.658325,1373.15,60.0,-3.886755,-10.491675
2,0.000812,5.155476,173.378249,1231.833618,1243.15,90.0,83.378249,-11.316382
3,0.000813,3.777593,43.710709,1229.53894,1243.15,90.0,-46.289291,-13.61106
4,0.000844,4.507475,90.692521,1185.207764,1173.15,90.0,0.692521,12.057764
5,0.00091,7.038952,1140.191915,1099.006226,1073.15,480.0,660.191915,25.856226
6,0.000924,6.706893,818.025056,1082.625366,1073.15,480.0,338.025056,9.475366
7,0.000798,7.175197,1306.617642,1252.616943,1243.15,1440.0,-133.382358,9.466943
8,0.000803,7.130372,1249.341099,1245.643066,1243.15,1440.0,-190.658901,2.493066
9,0.000795,6.050409,424.28646,1257.111572,1243.15,1440.0,-1015.71354,13.961572
