In [50]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error , mean_squared_error

In [51]:
import pandas as pd
df = pd.read_csv("Steel Manufacturing.csv")

X = df.drop(columns = ["Inclusions" , "Steel Strength"])

y1 = df['Inclusions']
y2 = df['Steel Strength']

In [52]:
X_train, X_test , y1_train , y1_test = train_test_split(X, y1, train_size = 0.8, shuffle = True, random_state = 42 )
X_train, X_test , y2_train , y2_test = train_test_split(X, y2, train_size = 0.8, shuffle = True, random_state = 42 )

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [53]:
numerical_pipeline = Pipeline([
    ('scaler', StandardScaler())])

categorical_pipeline = Pipeline([
    ('one_hot_encoder', OneHotEncoder())])

numerical_features = df.drop(columns = ["Inclusions" , "Steel Strength"]).select_dtypes(exclude = ["object"]).columns
categorical_features = df.drop(columns = ["Inclusions" , "Steel Strength"]).select_dtypes(include = ["object"]).columns

In [54]:
preprocessor = ColumnTransformer([
    ('num', numerical_pipeline, numerical_features),
    ('cat', categorical_pipeline, categorical_features)], remainder='passthrough')

In [55]:
model1 = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor',
     RandomForestRegressor(n_estimators=100))])
model2 = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor',
     RandomForestRegressor(n_estimators=100))])

In [56]:
model1.fit(X_train, y1_train)
model2.fit(X_train, y2_train)

In [57]:
mean_absolute_percentage_error(model1.predict(X_test), y1_test)

0.04136575120902261

In [58]:
mean_absolute_percentage_error(model2.predict(X_test), y2_test)

0.054483095372277815

In [59]:
import pickle

# File path where the model will be saved
model_file_path1 = 'inclusions_model.pkl'
model_file_path2 = 'steel_strength_model.pkl'

# Save the model to the pkl file
with open(model_file_path1, 'wb') as f:
    pickle.dump(model1, f)

# Save the model to the pkl file
with open(model_file_path2, 'wb') as f:
    pickle.dump(model2, f)

In [60]:
df

Unnamed: 0,Heating Temperature,Cooling Temperature,Solid Mold Size,Coke %,Minerals %,Production Type,Inclusions,Steel Strength
0,147,127,51,39,61,Unit 2,13.83,962
1,180,123,49,64,36,Unit 1,9.34,928
2,155,141,66,71,29,Unit 2,12.84,1142
3,177,136,38,40,60,Unit 1,15.08,691
4,147,123,57,48,52,Unit 1,9.57,973
...,...,...,...,...,...,...,...,...
1995,146,142,35,58,42,Unit 3,13.32,625
1996,152,130,47,71,29,Unit 3,10.10,822
1997,137,117,71,71,29,Unit 2,10.85,1042
1998,170,128,48,45,55,Unit 3,14.87,496
