# **Importing Libraries**

**Basic Libraries**

In [1]:
import pandas as pd
import numpy as np

**Libraries for visualisation and analysis**

In [2]:
#libraries used for statistical graphics in python
import matplotlib.pyplot as plt

**Data Preprocessing Libraries**

In [3]:
#Libraries used for data preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# **Importing Models**

In [4]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression, RidgeCV, SGDRegressor,MultiTaskLasso,MultiTaskElasticNet
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor,HistGradientBoostingRegressor


#for model evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# **Data Analysis**

**Reading the Data**

In [5]:
Alloy_data_path = r"https://raw.githubusercontent.com/DarshanGoodGuy/IITISOC25/refs/heads/main/final_dataset1.csv"
Alloy_data = pd.read_csv(Alloy_data_path)

**Analysing the Data**

In [6]:
Alloy_data.head()

Unnamed: 0,FORMULA,Co,Cr,Fe,Ni,Mn,Nb,Ti,Al,C,...,Ca,Y,Pd,Sc,PROPERTY: Calculated Density (g/cm$^3$),PROPERTY: Test temperature ($^\circ$C),PROPERTY: YS (MPa),PROPERTY: UTS (MPa),PROPERTY: Elongation (%),PROPERTY: Calculated Young modulus (GPa)
0,Co1 Cr1 Fe1 Ni1,26.138193,23.061468,24.768591,26.031747,,,,,,...,,,,,8.2,25.0,274.0,708.0,39.0,226.0
1,Co1 Cr1 Mn1 Ni1,26.243766,23.154613,,26.13689,24.464731,,,,,...,,,,,8.1,25.0,282.0,694.0,44.0,222.0
2,Co1 Cr1 Ni1,34.743724,30.654043,,34.602233,,,,,,...,,,,,8.3,25.0,300.0,860.0,60.0,231.0
3,Co1 Fe1 Mn1 Ni1,25.801523,,24.449562,25.696448,24.052467,,,,,...,,,,,8.2,25.0,170.0,550.0,41.0,204.0
4,Co1 Fe1 Ni1,33.972825,,32.192701,33.834474,,,,,,...,,,,,8.5,25.0,211.0,513.0,31.0,207.0


In [7]:
Alloy_data.shape

(947, 33)

In [8]:
Alloy_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 947 entries, 0 to 946
Data columns (total 33 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   FORMULA                                   947 non-null    object 
 1   Co                                        389 non-null    float64
 2   Cr                                        477 non-null    float64
 3   Fe                                        428 non-null    float64
 4   Ni                                        449 non-null    float64
 5   Mn                                        132 non-null    float64
 6   Nb                                        447 non-null    float64
 7   Ti                                        486 non-null    float64
 8   Al                                        344 non-null    float64
 9   C                                         19 non-null     float64
 10  Mo                                    

**Filling the missing values with mean**

In [9]:
Alloy_data.isna().sum()

Unnamed: 0,0
FORMULA,0
Co,558
Cr,470
Fe,519
Ni,498
Mn,815
Nb,500
Ti,461
Al,603
C,928


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Step 1: Load the dataset
df = pd.read_csv("final_dataset1.csv")  # Replace with the correct path
target = Alloy_data.iloc[:,1:27]  # Replace with your actual target column name

# Step 2: Identify property columns
property_cols = [col for col in df.columns if "property_" in col]

# Step 3: Impute missing property values (use median or mean)
for col in property_cols:
    df[col].fillna(df[col].median(), inplace=True)

# Step 4: Generate synthetic data by adding noise
num_synthetic_samples = len(df)  # You can also try len(df) * 2
synthetic_data = df.copy()
synthetic_data[property_cols] += np.random.normal(loc=0.0, scale=0.05, size=(num_synthetic_samples, len(property_cols)))

# Step 5: Combine original and synthetic data
augmented_df = pd.concat([df, synthetic_data], ignore_index=True)

# Step 6: Train/test split and model
X = augmented_df.drop(columns=[target])
y = augmented_df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("R² after simple augmentation:", r2_score(y_test, y_pred))

FileNotFoundError: [Errno 2] No such file or directory: 'final_dataset1.csv'

In [None]:
Alloy_data.iloc[:, -4:] = Alloy_data.iloc[:, -4:].fillna(Alloy_data.iloc[:, -4:].mean())
Alloy_data.head()

In [None]:
Alloy_data.isna().sum()

# **Data Standardisation**

In [None]:
Alloy_data.shape

In [None]:
X.head()

In [None]:
y.head()

**Scaling of data**

In [None]:
sc = MinMaxScaler()

X_scaled = pd.DataFrame(sc.fit_transform(X),columns=X.columns);
columns_to_scale = ['PROPERTY: Calculated Density (g/cm$^3$)', 'PROPERTY: Test temperature ($^\circ$C)', 'PROPERTY: YS (MPa)', 'PROPERTY: UTS (MPa)','PROPERTY: Elongation (%)','PROPERTY: Calculated Young modulus (GPa)']
X_scaled = X_scaled.fillna(0)
y_scaled = y.copy()
y_scaled[columns_to_scale] = sc.fit_transform(y[columns_to_scale])

In [None]:
import pickle

with open("sc.pkl", "wb") as f:
    pickle.dump(sc, f)


In [None]:
X_scaled.head()

In [None]:
y_scaled.head()

In [None]:
y_scaled.columns

In [None]:
from sklearn.model_selection import train_test_split

X_new,X_test,y_new,y_test = train_test_split(X_scaled,y_scaled,test_size=0.1,random_state=1);
X_train,X_valid,y_train,y_valid = train_test_split(X_new,y_new,test_size=0.2,random_state=1)

[X_train.shape,y_train.shape],[X_valid.shape,y_valid.shape],[X_test.shape,y_test.shape]

In [None]:
print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_valid.shape, y_valid.shape)
print("Test set shape:", X_test.shape, y_test.shape)

In [None]:
y_Test = sc.inverse_transform(y_test)
actual_Calculated_Density = np.transpose(y_Test)[0]
actual_Test_Temperature = np.transpose(y_Test)[1]
actual_YS = np.transpose(y_Test)[2]
actual_UTS = np.transpose(y_Test)[3]
actual_Elongation = np.transpose(y_Test)[4]
actual_Calculated_Youngs_Modulus = np.transpose(y_Test)[5]

In [None]:
actual_Calculated_Density

# **Modelling**

**Using various regression models to find the best one**

In [None]:
import xgboost as xgb

def model_results(XTrain,XValid,yTrain,yValid):
  models =[('Random Forest',RandomForestRegressor()),
           ('MLP',MLPRegressor(max_iter = 1000)),
           ('RidgeCV',MultiOutputRegressor(RidgeCV())),
           ('SGD',MultiOutputRegressor(SGDRegressor())),
           ('KNN', KNeighborsRegressor(n_neighbors=10)),
           ('Support Vector',MultiOutputRegressor(SVR())),
           ('Decision Tree',MultiOutputRegressor(DecisionTreeRegressor())),
           ('AdaBoost',MultiOutputRegressor(AdaBoostRegressor())),
           ('X_gb',MultiOutputRegressor(xgb.XGBRegressor())),
           ('GradientBoost',MultiOutputRegressor(GradientBoostingRegressor())),
          ]
  finalResults = []

  for name,model in models:
      model.fit(XTrain, yTrain)
      model_results = model.predict(XValid)
      r2score = r2_score(yValid, model_results)
      RMSE = np.sqrt(mean_squared_error(yValid, model_results))
      MAE = mean_absolute_error(yValid, model_results)
      finalResults.append((name, r2score, RMSE, MAE))

  finalResults.sort(key=lambda k:k[2])
  return finalResults



In [None]:
np.random.seed(42)
np_results = model_results(X_train,X_valid,y_train,y_valid)
results = pd.DataFrame(np_results,columns=['Model Name', 'R2 Score', 'RMSE', 'MAE'])
results

In [None]:
import xgboost as xgb

def model_test_results(XTrain,XTest,yTrain,yTest):
  test_models =[('Linear', LinearRegression()),
           ('Random Forest',RandomForestRegressor()),
           ('MLP',MLPRegressor(max_iter = 1000)),
           ('RidgeCV',MultiOutputRegressor(RidgeCV())),
           ('SGD',MultiOutputRegressor(SGDRegressor())),
           ('KNN', KNeighborsRegressor(n_neighbors=10)),
           ('Support Vector',MultiOutputRegressor(SVR())),
           ('Decision Tree',MultiOutputRegressor(DecisionTreeRegressor())),
           ('AdaBoost',MultiOutputRegressor(AdaBoostRegressor())),
           ('X_gb',MultiOutputRegressor(xgb.XGBRegressor())),
           ('GradientBoost',MultiOutputRegressor(GradientBoostingRegressor())),
          ]
  finalResults = []

  for name,model in test_models:
      model.fit(XTrain, yTrain)
      model_results = model.predict(XTest)
      r2score = r2_score(yTest, model_results)
      RMSE = np.sqrt(mean_squared_error(yTest, model_results))
      MAE = mean_absolute_error(yTest, model_results)
      finalResults.append((name, r2score, RMSE, MAE))
  finalResults.sort(key=lambda k:k[2])
  return finalResults


In [None]:
np.random.seed(42)
np__test_results = model_test_results(X_train,X_test,y_train,y_test)
test_results = pd.DataFrame(np__test_results,columns=['Model Name', 'R2 Score', 'RMSE', 'MAE'])
test_results

In [None]:
names = []
r2score = []
rmse = []
mae = []
for model in np_results:
  names.append(model[0])
  r2score.append(model[1])
  rmse.append(model[2])
  mae.append(model[3])

r2 score comparison

In [None]:
fig = plt.figure(figsize = (16, 8))
plt.bar(names,r2score,width=0.8,color='#6DA9E4')
plt.title('r2score comparison')

rmse comparison

In [None]:
fig = plt.figure(figsize = (16, 8))
plt.bar(names,rmse,width=0.8,color='#6DA9E4')
plt.title('rmse comparison')

mae comparison

In [None]:
fig = plt.figure(figsize = (16, 8))
plt.bar(names,mae,width=0.8,color='#6DA9E4')
plt.title('mae comparison')

In [None]:
Final_models = results.iloc[:1, :]
Final_models

Regression using Neural Network

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()
model.add(Dense(128, input_dim=26, activation='tanh'))
model.add(Dense(64, activation='tanh'))
model.add(Dense(32, activation='tanh'))
model.add(Dense(6, activation='linear'))

model.compile(loss=keras.losses.mse,
                 optimizer=keras.optimizers.Adam(),
                 metrics=["mse"])

In [None]:
history = model.fit(X_train, y_train, batch_size = 128, shuffle=True, epochs=500)

In [None]:
y_pred = model.predict(X_valid)

In [None]:
NN_res = {'Model Name':'Neural Network',	'R2 Score':	r2_score(y_valid, y_pred),'RMSE':np.sqrt(mean_squared_error(y_valid, y_pred)), 'MAE':mean_absolute_error(y_valid, y_pred)}

In [None]:
Final_models = pd.concat([Final_models, pd.DataFrame([NN_res])], ignore_index=True)

In [None]:
Final_models

In [None]:
NN_test_y_pred = model.predict(X_test)

In [None]:
NN_Final_results = {'r2score':r2_score(y_test,NN_test_y_pred),'rmse':np.sqrt(mean_squared_error(y_test,NN_test_y_pred)),'mae':mean_absolute_error(y_test,NN_test_y_pred)}
NN_Final_results

# Boosting Parameters

**learning_rate**

In [None]:
def parameter_LR(XTrain,yTrain,XValid,yValid):
  LR = [0.0001,0.001,0.01,0.1,0.2,0.3,0.4,0.55,0.573,0.5,0.6,0.7,0.8,0.9,1]
  Results = []
  for lr in LR:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = lr))
    model.fit(XTrain,yTrain)
    y_pred = model.predict(XValid)
    r2score = r2_score(yValid,y_pred)
    RMSE = np.sqrt(mean_squared_error(yValid,y_pred))
    MAE = mean_absolute_error(yValid,y_pred)
    Results.append((lr, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['LR', 'R2 Score', 'RMSE', 'MAE'])

In [None]:
Results_LR = parameter_LR(X_train,y_train,X_valid,y_valid)
Results_LR

In [None]:
best_LR = Results_LR.iloc[0,0]
best_LR

**n_estimators**

In [None]:
def parameter_estimator(XTrain,yTrain,XValid,yValid):
  estimators = [100,200,300,400,500,700,1000]
  Results = []
  for estimator in estimators:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1,learning_rate = best_LR,n_estimators = estimator))
    model.fit(X_train,y_train)
    y_pred = model.predict(X_valid)
    r2score = r2_score(y_valid,y_pred)
    RMSE = np.sqrt(mean_squared_error(y_valid,y_pred))
    MAE = mean_absolute_error(y_valid,y_pred)
    Results.append((estimator, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['estimator', 'R2 Score', 'RMSE', 'MAE'])

In [None]:
Results_estimator = parameter_estimator(X_train,y_train,X_valid,y_valid)
Results_estimator

In [None]:
best_n_estimator = Results_estimator.iloc[0,0]
best_n_estimator

**subsample**

In [None]:
def parameter_subsample(XTrain,yTrain,XValid,yValid):
  samples = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
  Results = []
  for x in samples:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator, subsample = x))
    model.fit(XTrain,yTrain)
    y_pred = model.predict(XValid)
    r2score = r2_score(yValid,y_pred)
    RMSE = np.sqrt(mean_squared_error(yValid,y_pred))
    MAE = mean_absolute_error(yValid,y_pred)
    Results.append((x, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['subsample', 'R2 Score', 'RMSE', 'MAE'])

In [None]:
Results_subsample = parameter_subsample(X_train,y_train,X_valid,y_valid)
Results_subsample

In [None]:
best_subsample= Results_subsample.iloc[0,0]
best_subsample

# Tree-Based Parameters

**min_samples_split**

In [None]:
def parameter_split(XTrain,yTrain,XValid,yValid):
  splits = [2,3,4,5,6,7,8,9,10]
  Results = []
  for x in splits:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator,min_samples_split=x))
    model.fit(XTrain,yTrain)
    y_pred = model.predict(XValid)
    r2score = r2_score(yValid,y_pred)
    RMSE = np.sqrt(mean_squared_error(yValid,y_pred))
    MAE = mean_absolute_error(yValid,y_pred)
    Results.append((x, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['min_sample_split', 'R2 Score', 'RMSE', 'MAE'])

In [None]:
Results_split = parameter_split(X_train,y_train,X_valid,y_valid)
Results_split

In [None]:
best_split= Results_split.iloc[0,0]
best_split

**max_leaf_nodes**

In [None]:
def parameter_nodes(XTrain,yTrain,XValid,yValid):
  nodes = [2,3,4,5,6,7,8,9,10]
  Results = []
  for x in nodes:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator,min_samples_split=best_split, max_leaf_nodes = x))
    model.fit(XTrain,yTrain)
    y_pred = model.predict(XValid)
    r2score = r2_score(yValid,y_pred)
    RMSE = np.sqrt(mean_squared_error(yValid,y_pred))
    MAE = mean_absolute_error(yValid,y_pred)
    Results.append((x, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['max_leaf_nodes', 'R2 Score', 'RMSE', 'MAE'])

In [None]:
Results_nodes = parameter_nodes(X_train,y_train,X_valid,y_valid)
Results_nodes

In [None]:
best_ln = Results_nodes.iloc[0,0]
best_ln

# Miscellaneous Parameters

**loss**

In [None]:
def parameter_loss(XTrain,yTrain,XValid,yValid):
  losses = ["squared_error","absolute_error","huber","quantile"]
  Results = []
  for x in losses:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator,min_samples_split=best_split,max_leaf_nodes = best_ln , loss = x))
    model.fit(XTrain,yTrain)
    y_pred = model.predict(XValid)
    r2score = r2_score(yValid,y_pred)
    RMSE = np.sqrt(mean_squared_error(yValid,y_pred))
    MAE = mean_absolute_error(yValid,y_pred)
    Results.append((x, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['Loss', 'R2 Score', 'RMSE', 'MAE'])

In [None]:
Results_loss = parameter_loss(X_train,y_train,X_valid,y_valid)

In [None]:
Results_loss

In [None]:
best_loss = Results_loss.iloc[0,0]
best_loss

criterion

In [None]:
def parameter_criterion(XTrain,yTrain,XValid,yValid):
  criterion = ['friedman_mse', 'squared_error']
  Results = []
  for x in criterion:
    model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator,min_samples_split=best_split,max_leaf_nodes = best_ln ,loss = best_loss,criterion = x))
    model.fit(XTrain,yTrain)
    y_pred = model.predict(XValid)
    r2score = r2_score(yValid,y_pred)
    RMSE = np.sqrt(mean_squared_error(yValid,y_pred))
    MAE = mean_absolute_error(yValid,y_pred)
    Results.append((x, r2score, RMSE, MAE))
  Results.sort(key=lambda k:k[2])
  return pd.DataFrame(Results,columns=['subsample', 'R2 Score', 'RMSE', 'MAE'])


In [None]:
Results_criterion = parameter_criterion(X_train,y_train,X_valid,y_valid)

In [None]:
Results_criterion

In [None]:
best_criterion = Results_criterion.iloc[0,0]
best_criterion

# **GB Tuned Results**

**GB Tuned Parameters:**

In [None]:
print("Learning_Rate: ",best_LR)
print("n_estimator: ",best_n_estimator)
print("min_samples_split: ",best_split)
print("max_leaf_nodes: ",best_ln)
print("loss: ", best_loss)
print("Criterion: ",best_criterion)

In [None]:
Final_validation_model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator,min_samples_split=best_split,max_leaf_nodes = best_ln ,loss = best_loss,criterion = best_criterion))
Final_validation_model.fit(X_train,y_train)
final_validation_pred = Final_validation_model.predict(X_valid)

In [None]:
Final_validation_results = {'r2score':r2_score(y_valid,final_validation_pred),'rmse':np.sqrt(mean_squared_error(y_valid,final_validation_pred)),'mae':mean_absolute_error(y_valid,final_validation_pred)}
Final_validation_results

# **Predictions**

In [None]:
Final_model = MultiOutputRegressor(GradientBoostingRegressor(random_state = 1, learning_rate = best_LR, n_estimators = best_n_estimator,min_samples_split=best_split,max_leaf_nodes = best_ln ,loss = best_loss,criterion = best_criterion))
Final_model.fit(X_train,y_train)
final_pred = Final_model.predict(X_test)

In [None]:
final_pred_original = sc.inverse_transform(final_pred)

In [None]:
final_pred_original

In [None]:
Final_results = {'r2score':r2_score(y_test,final_pred),'rmse':np.sqrt(mean_squared_error(y_test,final_pred)),'mae':mean_absolute_error(y_test,final_pred)}
Final_results

In [None]:
Final_pred = sc.inverse_transform(final_pred)
GB_Calculated_Density = np.transpose(Final_pred)[0]
GB_Test_temperature = np.transpose(Final_pred)[1]
GB_YS = np.transpose(Final_pred)[2]
GB_UTS = np.transpose(Final_pred)[3]
GB_Elongation = np.transpose(Final_pred)[4]
GB_Calculated_Young_modulus = np.transpose(Final_pred)[5]

In [None]:
Final_pred = pd.DataFrame(Final_pred,columns = y_test.columns)

In [None]:
y_Test = pd.DataFrame(y_Test,columns = y_test.columns)

# **Graphical Visualization**

**Calculated Density**

In [None]:
fig ,(ax0) = plt.subplots(figsize=(16,8))
ax0.scatter(GB_Calculated_Density, actual_Calculated_Density, color = 'hotpink', s=18)
x_min = min(GB_Calculated_Density.min(), actual_Calculated_Density.min())
x_max = max(GB_Calculated_Density.max(), actual_Calculated_Density.max())
x3 = np.linspace(x_min, x_max, 1000)
y3 = x3
ax0.plot(x3, y3)
ax0.set_title('Calculated Density', fontsize = 20)
ax0.set_xlabel('predicted_Calculated_Density', fontsize = 14)
ax0.set_ylabel('actual_Calculated_Density', fontsize = 14)
plt.show()

**Test Temperature**

In [None]:
fig ,(ax0) = plt.subplots(figsize=(16,8))
ax0.scatter(GB_Test_temperature, actual_Test_Temperature, color = 'hotpink', s=18)
x_max = max(GB_Test_temperature.max(), actual_Test_Temperature.max())
x_min = min(GB_Test_temperature.min(), actual_Test_Temperature.min())
x3 = np.linspace(x_min, x_max, 1000)
y3 = x3
ax0.plot(x3, y3)
ax0.set_title('Test Temperature', fontsize = 20)
ax0.set_xlabel('predicted_Test_temperature', fontsize = 14)
ax0.set_ylabel('actual_Test_Temperature', fontsize = 14)
plt.show()

**Yield Strength**

In [None]:
fig ,(ax0) = plt.subplots(figsize=(16,8))
ax0.scatter(GB_YS, actual_YS, color = 'hotpink', s=18)
x_max = max(GB_YS.max(), actual_YS.max())
x_min = min(GB_YS.min(), actual_YS.min())
x3 = np.linspace(x_min, x_max, 1000)
y3 = x3
ax0.plot(x3, y3)
ax0.set_title('Yield Strength', fontsize = 20)
ax0.set_xlabel('predicted_Yield_Strength', fontsize = 14)
ax0.set_ylabel('actual_Yirld_Strength', fontsize = 14)
plt.show()

**Ultimate Tensile Strength**

In [None]:
fig ,(ax0) = plt.subplots(figsize=(16,8))
ax0.scatter(GB_UTS, actual_UTS, color = 'hotpink', s=18)
x_max = max(GB_UTS.max(), actual_UTS.max())
x_min = min(GB_UTS.min(), actual_UTS.min())
x3 = np.linspace(x_min, x_max, 1000)
y3 = x3
ax0.plot(x3, y3)
ax0.set_title('Ultimate Tensile Strength', fontsize = 20)
ax0.set_xlabel('predicted_UTS', fontsize = 14)
ax0.set_ylabel('actual_UTS', fontsize = 14)
plt.show()

**Elongation**

In [None]:
fig ,(ax0) = plt.subplots(figsize=(16,8))
ax0.scatter(GB_Elongation, actual_Elongation, color = 'hotpink', s=18)
x_max = max(GB_Elongation.max(), actual_Elongation.max())
x_min = min(GB_Elongation.min(), actual_Elongation.min())
x3 = np.linspace(x_min, x_max, 1000)
y3 = x3
ax0.plot(x3, y3)
ax0.set_title('Elongation', fontsize = 20)
ax0.set_xlabel('predicted_Elongation', fontsize = 14)
ax0.set_ylabel('actual_Elongation', fontsize = 14)
plt.show()

**Calculated Youngs Modulus**

In [None]:
fig ,(ax0) = plt.subplots(figsize=(16,8))
ax0.scatter(GB_Calculated_Young_modulus, actual_Calculated_Youngs_Modulus, color = 'hotpink', s=18)
x_max = max(GB_Calculated_Young_modulus.max(), actual_Calculated_Youngs_Modulus.max())
x_min = min(GB_Calculated_Young_modulus.min(), actual_Calculated_Youngs_Modulus.min())
x3 = np.linspace(x_min, x_max, 1000)
y3 = x3
ax0.plot(x3, y3)
ax0.set_title('Calculated Youngs Modulus', fontsize = 20)
ax0.set_xlabel('predicted_CYM', fontsize = 14)
ax0.set_ylabel('actual_CYM', fontsize = 14)
plt.show()

In [None]:
import pickle

with open("model.pkl", "wb") as f:
    pickle.dump(Final_model, f)
