In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as stats
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn import svm
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_excel(r"C:\Users\we\Downloads\ABS+CF DATA POINTS ML (2).xlsx")

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.nunique().sort_values(ascending = False)

In [None]:
target = pd.DataFrame(df['Tensile Strength (MPa)'])
feature = pd.DataFrame(df.drop(['Tensile Strength (MPa)'], axis = 1))

In [None]:
target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size=0.3, stratify= feature[["Infill Density (%)"]])

In [None]:
list_feature = [col for col in X_train.columns if X_train[col].dtype !='O']

In [None]:
plt.boxplot(y_train['Tensile Strength (MPa)'])
plt.show()

In [None]:
for i in list_feature:
    sns.boxplot(X_train[i])
    plt.show()

In [None]:
plt.subplot(121)
sns.distplot(x = y_train['Tensile Strength (MPa)'], kde = True, bins = 15)
plt.xlabel('Tensile Strength (MPa)')
plt.title('Tensile Strength (MPa)' + ' pdf')

plt.subplot(122)
stats.probplot(y_train['Tensile Strength (MPa)'], dist = 'norm', plot = plt)
plt.title('Tensile Strength (MPa)' + ' QQ plot')
plt.show()

In [None]:
for i in range(len(list_feature)):
    for j in range(i+1, len(list_feature)):
        sns.regplot(x = feature[list_feature[i]], y = feature[list_feature[j]], scatter=True)
        plt.show()

In [None]:
for i in range(len(list_feature)):
    sns.regplot(x = feature[list_feature[i]], y = target["Tensile Strength (MPa)"], scatter=True)
    plt.show()

In [None]:
feature_pipeline = Pipeline([
    ('scalar', StandardScaler())
])

In [None]:
transformer = ColumnTransformer(transformers=
    [('tnf1', feature_pipeline, list_feature),], remainder = 'passthrough')

In [None]:
X_train = transformer.fit_transform(X_train)
X_test = transformer.transform(X_test)

In [None]:
def evaluate_model(true,predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted, squared = True)
    rmse = mean_squared_error(true, predicted, squared = False)
    r2 = r2_score(true, predicted)
    return mae, mse, rmse, r2

In [None]:
models = {
    "Linear Regression":LinearRegression(),
    "KNeighbors Regressor":KNeighborsRegressor(),
    "Support Vector Regressor":SVR(),
    "Random Forest Regressor":RandomForestRegressor(),
    "Decision Tree Regressor":DecisionTreeRegressor(),
}
model_mae_dic = dict()
model_mse_dic = dict()
model_rmse_dic = dict()
model_R2_score = dict()

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,y_train)
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    train_mae = evaluate_model(y_train,y_train_pred)[0]
    test_mae = evaluate_model(y_test,y_test_pred)[0]
    train_mse = evaluate_model(y_train,y_train_pred)[1]
    test_mse = evaluate_model(y_test,y_test_pred)[1]
    train_rmse = evaluate_model(y_train,y_train_pred)[2]
    test_rmse = evaluate_model(y_test,y_test_pred)[2]
    train_R2_score = evaluate_model(y_train,y_train_pred)[3]
    test_R2_score = evaluate_model(y_test,y_test_pred)[3]
    
    print(list(models.keys())[i] + ":")
    model_mae_dic[(list(models.keys())[i])] = test_mae
    model_mse_dic[(list(models.keys())[i])] = test_mse
    model_rmse_dic[(list(models.keys())[i])] = test_rmse
    model_R2_score[(list(models.keys())[i])] = test_R2_score
    
    print('Model performance on training set:')
    print("Train MAE: {:.4f}".format(train_mae))
    print("Train MSE: {:.4f}".format(train_mse))
    print("Train RMSE: {:.4f}".format(train_rmse))
    print("Model R2 score: {:.4f}".format(train_R2_score))
    sns.regplot(y = y_train, x = y_train_pred)
    plt.title("Trained " + list(models.keys())[i])
    plt.xlabel("Actual Tensile Strength (MPa)")
    plt.ylabel("Predicted Tensile Strength (MPa)")
    plt.show()
    
    print("----------------------------------------")
    
    print('Model performance on test set:')
    print("Test MAE: {:.4f}".format(test_mae))
    print("Test MSE: {:.4f}".format(test_mse))
    print("Test RMSE: {:.4f}".format(test_rmse))
    print("Model R2 score: {:.4f}".format(test_R2_score))
    sns.regplot(y = y_test, x = y_test_pred)
    plt.title("Tested " + list(models.keys())[i])
    plt.xlabel("Actual Tensile Strength (MPa)")
    plt.ylabel("Predicted Tensile Strength (MPa)")
    plt.show()
    
    print("----------------------------------------")
#     print("Model R2 score: {:.4f}".format(R2_score))
    
    
    print("="*40)
    print('\n') 

In [None]:
model

In [None]:
model.predict([[0.2, 45, 90]])

In [None]:
import os

# Define the directory path
directory = 'D:\FYP'

# Check if the directory exists, if not, create it
if not os.path.exists(directory):
    os.makedirs(directory)

# Change the current working directory to the new directory
os.chdir(directory)

In [None]:
import pickle

In [None]:
model1 = RandomForestRegressor()
model1.fit(X_train, y_train)

In [None]:
with open('model.pkl', 'wb') as f:
    pickle.dump(model1, f)

In [None]:
with open('model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)