In [None]:
# issues might be :
## onehot encoding
## Dropout(0.05) # MAE : 20
## EarlyStopping(patience=20,restore_best_weight=True,epochs=500) 



In [5]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [6]:
dataset = pd.read_excel(r"C:\Users\pritam.moharana\Jupyter\Caster AI\FRC\new pipeline\fcd_balanced_grade_wise_count.xlsx")

In [11]:
# selecting only the independent features by dropping the dependent features and the unused columns i.e., 'Length'
X = dataset.drop(columns = ['Length',
                            'W1(Top)F', 
                            'W1(Top)B',
                            'W2 (Middle)F', 
                            'W2 (Middle)B', 
                            'W3(Bottom)F', 
                            'W3(Bottom)B'])

In [24]:
# selecting only the dependent features 
Y = dataset[['W1(Top)F', 
             'W1(Top)B',
             'W2 (Middle)F', 
             'W2 (Middle)B', 
             'W3(Bottom)F', 
             'W3(Bottom)B']]

In [26]:
# these are the categorical columns
categorical_columns = ["Grade", 
                       "SLAB TYPE", 
                       "Cast powder", 
                       "HMO Curve", 
                       "SCW Curve", 
                       "SEN Spec'n (FLAT, UP, DOWN)\n"]
X[categorical_columns] = X[categorical_columns].astype(str)


In [27]:
numerical_columns = [col for col in X.columns if col not in categorical_columns]

In [28]:
# OneHot for categorical
# Scaling for numerical
# Scaling for output

preprocessor = ColumnTransformer(
    transformers = [
        ("num", StandardScaler(), numerical_columns),
        ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ])
X_processed = preprocessor.fit_transform(X)
y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(Y)

In [30]:
# spliting the data into training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_scaled, test_size = 0.2, random_state=42)

In [101]:
# Neural Network Model (57 columns -> hidden -> 6)
model = Sequential([
    Dense(256, activation = 'relu', input_shape = (X_train.shape[1],)),
    Dropout(0.3),

    Dense(128, activation = 'relu'),
    Dropout(0.2),

    Dense(64, activation = 'relu'),
    Dense(6, activation = 'linear')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [102]:
model.compile(
    optimizer = 'adam',
    loss = 'mse',
    metrics = ['mae']
)


In [None]:
# training with EarlyStopping

early_stop = EarlyStopping(
    monitor = 'val_loss',
    patience = 20,
    restore_best_weights = True,
)


model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    epochs = 200,
    batch_size = 32,
    callbacks = [early_stop],
    verbose = 1
)


Epoch 1/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - loss: 0.0176 - mae: 0.0720 - val_loss: 0.0344 - val_mae: 0.1122
Epoch 2/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - loss: 0.0147 - mae: 0.0629 - val_loss: 0.0289 - val_mae: 0.1032
Epoch 3/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - loss: 0.0146 - mae: 0.0620 - val_loss: 0.0320 - val_mae: 0.1048
Epoch 4/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - loss: 0.0143 - mae: 0.0606 - val_loss: 0.0315 - val_mae: 0.1032
Epoch 5/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 10ms/step - loss: 0.0136 - mae: 0.0588 - val_loss: 0.0324 - val_mae: 0.1074
Epoch 6/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step - loss: 0.0130 - mae: 0.0570 - val_loss: 0.0442 - val_mae: 0.1254
Epoch 7/200
[1m1443/1443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [104]:
y_pred_scaled = model.predict(X_test)

y_pred = y_scaler.inverse_transform(y_pred_scaled)

y_true = y_scaler.inverse_transform(y_test)

[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step


In [105]:
y_pred

array([[1508.3894, 1499.9023, 1498.2616, 1496.4915, 1503.5535, 1496.534 ],
       [1265.612 , 1264.6521, 1263.8934, 1263.5471, 1263.9119, 1263.0942],
       [1246.0846, 1244.641 , 1244.1403, 1243.8561, 1245.4675, 1243.1577],
       ...,
       [1260.7869, 1259.7516, 1257.214 , 1256.2756, 1259.8037, 1256.9493],
       [1283.7231, 1282.8246, 1284.2775, 1283.317 , 1284.5758, 1283.378 ],
       [1281.5214, 1279.8949, 1276.1837, 1275.4225, 1277.5963, 1278.0049]],
      dtype=float32)

In [106]:
y_true

array([[1524., 1524., 1515., 1515., 1522., 1522.],
       [1255., 1255., 1260., 1260., 1260., 1260.],
       [1253., 1253., 1244., 1244., 1248., 1248.],
       ...,
       [1242., 1242., 1247., 1247., 1247., 1247.],
       [1285., 1285., 1290., 1290., 1285., 1286.],
       [1285., 1285., 1270., 1270., 1275., 1275.]])

In [107]:
# y_pred = pd.DataFrame(y_pred,columns = op_cols)

# op_cols = ['W1(Top)F', 'W1(Top)B', 'W2 (Middle)F',
#        'W2 (Middle)B', 'W3(Bottom)F', 'W3(Bottom)B']

# for t in y_real.columns:
#     y_pred[t] = y_pred[t].astype(int)

In [108]:
# y_true = pd.DataFrame(y_true,columns = op_cols)

# op_cols = ['W1(Top)F', 'W1(Top)B', 'W2 (Middle)F',
#        'W2 (Middle)B', 'W3(Bottom)F', 'W3(Bottom)B']

# for t in y_real.columns:
#     y_true[t] = y_true[t].astype(int)

# testing on real_data

In [44]:
testing_data  = pd.read_excel(r"C:\Users\pritam.moharana\Jupyter\Caster AI\FRC\neural net\dec 10 data for testing (neural net).xlsx",header = 1)

In [109]:
# testing_data.columns

In [110]:
# X.columns

In [111]:
X_real = testing_data[['Grade', 
              'Series', 
              'SLAB TYPE', 
              'Section', 
              'TOC', 
              'Wider  life (L/F)',
              'Narrow life (L/R)', 
              'Cast powder', 
              'Super heat', 
              'CASTING SPEED',
              'delta T Right Side', 
              'delta T Loose Side', 
              'delta T Left Side',
              'delta T Fixed Side', 
              'Heat Removal Ratio left',
              'Heat Removal Ratio Right', 
              'HMO Curve', 
              'SCW Curve',
              'Mould Water Flow      (BF)(L/F)', 
              'Mould Water Flow       (NF) (L/R)',
              "SEN Spec'n (FLAT, UP, DOWN)\n", 
              'SEN\nDipping', 
              '% TAPER LEFT',
              '% TAPER RIGHT', 
              '%C', 
              '%Mn', 
              '%S', 
              '%P', 
              '%Si', 
              '%Ni', 
              '%Cr', 
              '%Cu',
              '%Mo', 
              '%Ti', 
              '%Co', 
              'H ppm', 
              'N ppm', 
              'Pb ppm', 
              'Sn ppm', 
              '%Al',
              'B ppm', 
              '%V', 
              'Ca ppm', 
              '%Nb', 
              '%W', 
              '%As', 
              '%Zn', 
              'FC', 
              'FF', 
              'MF',
              'Ni Eq', 
              'Cr Eq ', 
              'Creq/Nieq', 
              'Nieq/Creq ', 
              'NOMINAL DIMENSION 1',
              'NOMINAL DIMENSION 2', 
              'NOMINAL DIMENSION 3']]

In [112]:
X_real_processed = preprocessor.transform(X_real)

y_real_scaled = model.predict(X_real_processed)

y_real = y_scaler.inverse_transform(y_real_scaled)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


In [113]:
y_real_true = testing_data[['W1(Top)F', 
                            'W1(Top)B', 
                            'W2 (Middle)F',
                            'W2 (Middle)B', 
                            'W3(Bottom)F', 
                            'W3(Bottom)B']]

In [114]:
y_real_true - y_real


Unnamed: 0,W1(Top)F,W1(Top)B,W2 (Middle)F,W2 (Middle)B,W3(Bottom)F,W3(Bottom)B
0,30.788818,38.255859,28.340576,28.740845,22.391846,28.578247
1,30.788818,38.255859,28.340576,28.740845,22.391846,28.578247
2,30.788818,38.255859,28.340576,28.740845,22.391846,28.578247
3,30.788818,38.255859,28.340576,28.740845,22.391846,28.578247
4,30.788818,38.255859,28.340576,28.740845,22.391846,28.578247
...,...,...,...,...,...,...
66,31.335205,38.508301,45.818970,46.623657,41.017090,46.448486
67,31.335205,38.508301,45.818970,46.623657,41.017090,46.448486
68,31.335205,38.508301,45.818970,46.623657,41.017090,46.448486
69,31.335205,38.508301,45.818970,46.623657,41.017090,46.448486


In [115]:
y_real = pd.DataFrame(y_real,columns = op_cols)

op_cols = ['W1(Top)F', 'W1(Top)B', 'W2 (Middle)F',
       'W2 (Middle)B', 'W3(Bottom)F', 'W3(Bottom)B']

for t in y_real.columns:
    y_real[t] = y_real[t].astype(int)

In [116]:
from sklearn.metrics import mean_absolute_error

In [117]:
mae = mean_absolute_error(y_real_true, y_real)
print(round(mae,2))

20.43


In [118]:
for p in y_real_true.columns:
    
    w1 = y_real_true[p] - y_real[p]
    print("column Name : ",p,"Min : ",min(w1)," Max : ",max(w1))



column Name :  W1(Top)F Min :  -14  Max :  35
column Name :  W1(Top)B Min :  -14  Max :  43
column Name :  W2 (Middle)F Min :  -21  Max :  50
column Name :  W2 (Middle)B Min :  -20  Max :  51
column Name :  W3(Bottom)F Min :  -16  Max :  44
column Name :  W3(Bottom)B Min :  -13  Max :  50


In [75]:
from sklearn.dummy import DummyRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error

In [76]:
dummy = MultiOutputRegressor(DummyRegressor(strategy = 'mean'))
dummy.fit(X_train, y_train)

In [77]:
y_dummy = dummy.predict(X_test)
print("Baseline MAE : ",mean_absolute_error(y_real_true, y_dummy))

ValueError: Found input variables with inconsistent numbers of samples: [71, 14429]