<a href="https://colab.research.google.com/github/EhsanSheikhasadi/MLR_SVR_BC/blob/main/MLR_SVR_BC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Developing a transformed linear regression and support vector regression to deal with the demand uncertainty**

Ehsan Sheikhasadia, Saiedeh Gholamia, Donya Rahmania, Hamid Esmaeili Najafabadib

This is the code of the paper titled as "Developing a transformed linear regression and support vector regression to deal with the demand uncertainty: Application to blood supply chain in disaster", you can find the data from https://github.com/EhsanSheikhasadi/MLR_SVR_BC

Contact: ehsan.sheikhasadi1994@gmail.com



In [3]:
import numpy as np
import pandas as pd

# Set the display format for the Floating Point numbers
pd.options.display.float_format = "{:,.2f}".format

# To store dataset in a Pandas Dataframe
df = pd.read_excel("Iran_Natural_Disaster.xlsx")

#check if we have any missing values
print(df.isnull().sum())


#our columns we need for model
model_columns=df[['Earthquake  Magnitude','Focal Depth','direct distance from epicenter to nearest city',
                  'Number of Homeless','Max PGA(%g)','Number of Injuries']]
model_columns_names=['Earthquake  Magnitude','Focal Depth','direct distance from epicenter to nearest city',
                  'Number of Homeless','Max PGA(%g)','Number of Injuries']

#our features(independent variables)
X=df[['Earthquake  Magnitude','Focal Depth','direct distance from epicenter to nearest city',
                  'Number of Homeless','Max PGA(%g)']]
X_name=['Earthquake  Magnitude','Focal Depth','direct distance from epicenter to nearest city',
                  'Number of Homeless','Max PGA(%g)']


#dependent variable
Y=df['Number of Injuries']

#creating histograms for each column to check the distribution of data in each column
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20,20))
for i, feature in enumerate(model_columns):
    ax = fig.add_subplot(2, 3, i+1)
    df[feature].hist(bins='auto', ax=ax, facecolor='#2ab0ff')
    ax.set_title(feature, color='black')
fig.tight_layout()
plt.show()


#Box-Cox transportation
from scipy import stats
Mag_trans, lmbda = stats.boxcox(df['Number of Injuries'])
print('Best lambda parameter = %s' % round(lmbda, 3))

fig, ax = plt.subplots(figsize=(8, 4))
prob = stats.boxcox_normplot(df['Number of Injuries'], -20, 20, plot=ax)
ax.axvline(lmbda, color='r');
plt.show()

# fit Gaussian distribution

# Mag_trans.sort()
mean, std = stats.norm.fit(Mag_trans, loc=0)
pdf_norm = stats.norm.pdf(Mag_trans, mean, std)

fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(Mag_trans, bins='auto', density=True)
ax.plot(Mag_trans, pdf_norm, label='Fitted normal distribution')
ax.set_xlabel('Number of Injuries')
ax.set_ylabel('Transformed Probability')
ax.set_title('Box-Cox Transformed Distribution of Dependent Variable')
ax.legend();
plt.show()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))

prob = stats.probplot(df['Number of Injuries'], dist=stats.norm, plot=ax1)
prob = stats.probplot(Mag_trans, dist=stats.norm, plot=ax2)

#Visual Inspection with Q-Q Plots
ax1.set_title('Original Data')
ax1.set_ylabel('Number of Injuries')
ax2.set_title('Transforamed Number of Injuries, λ = %s' % -0.006);
ax2.set_ylabel('');
plt.show()

#Normality Test with Hypothesis Testing
k2, p = stats.normaltest(Mag_trans)
print('\nChi-squared statistic = %.3f, p = %.3f' % (k2, p))

alpha = 0.05
if p > alpha:
    print('\nThe transformed data is Gaussian (fails to reject the null hypothesis)')
else:
    print('\nThe transformed data does not look Gaussian (reject the null hypothesis)')

print(list(Mag_trans))
print(np.array(df['Number of Injuries']))

#Box Plot

import seaborn as sb
fig = plt.figure(figsize=(20,20))
for i in range(6):
     if i > 0:
         ax = fig.add_subplot(2, 3, i+1)
         sb.boxplot(data=model_columns.iloc[:,i])
         ax.set_title(model_columns_names[i], color='black')
plt.show()


#Correlations between the dependent variable and the independent variables
   # Create an empty Data Frame to store all the Correlations
corrdf = pd.DataFrame(columns = ['D V', 'Feature','Correlation']);

# Loop through all the Features in scope
df1 = df['Earthquake  Magnitude'];
for j in model_columns.iloc[:,1:6].columns:
     df2 = df[j]
     c = df1.corr(df2)
     corrdf = corrdf.append({'D V':'Earthquake  Magnitude','Feature':j, 'Correlation':c}, ignore_index = True);

print(corrdf)


# Build our Regression Models
#Create the Training and Test Setsel

from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(model_columns, train_size=0.7, random_state=44)
y_train = df_train[['Number of Injuries']]
x_train = df_train.drop("Number of Injuries", axis=1)
y_test = df_test[['Number of Injuries']]
x_test = df_test.drop("Number of Injuries", axis=1)
train_x, test_x,train_y,test_y = train_test_split(X, Y , train_size=0.7
                                                   , random_state=44)
print(df_train.shape)


# """Build the Multiple Linear Regression Model"""
#
from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize = True)
lr.fit(x_train, y_train)

y_pred_train = lr.predict(x_train)
print(y_pred_train)


#Check the Metrics

import sklearn.metrics as sm
print("Mean absolute error =", round(sm.mean_absolute_error(y_train,y_pred_train), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_train,y_pred_train), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_train,y_pred_train), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_train,y_pred_train), 2))
print("R2 score =", round(sm.r2_score(y_train, y_pred_train), 2))

# print("/")

y_pred_test = lr.predict(x_test)
print("Mean absolute error =", round(sm.mean_absolute_error(y_test,y_pred_test), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred_test),2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,y_pred_test), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_test,y_pred_test), 2))
print("R2 score =", round(sm.r2_score(y_test, y_pred_test), 2))


"""Build the Regression Model using Random Forest Algorithm"""

from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
rf = make_pipeline(StandardScaler(), RandomForestRegressor())
rf.fit(x_train, y_train)

y_pred_train = rf.predict(x_train)
print(y_pred_train)

print("Mean absolute error =", round(sm.mean_absolute_error(y_train,y_pred_train), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_train,y_pred_train), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_train,y_pred_train), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_train,y_pred_train), 2))
print("R2 score =", round(sm.r2_score(y_train, y_pred_train), 2))

# # print("/")

y_pred_test = rf.predict(x_test)
print("Mean absolute error =", round(sm.mean_absolute_error(y_test,y_pred_test), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred_test),2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,y_pred_test), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_test,y_pred_test), 2))
print("R2 score =", round(sm.r2_score(y_test, y_pred_test), 2))


"""Build Regression Model using SVM"""

from sklearn.svm import SVR
svr = make_pipeline(StandardScaler(), SVR(C = 1500.0, epsilon = 0.9))
svr.fit(x_train, y_train)

y_pred_train = svr.predict(x_train)
print("Mean absolute error =", round(sm.mean_absolute_error(y_train,y_pred_train), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_train,y_pred_train), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_train,y_pred_train), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_train,y_pred_train), 2))
print("R2 score =", round(sm.r2_score(y_train, y_pred_train), 2))

# print("/")

y_pred_test = svr.predict(x_test)
print("Mean absolute error =", round(sm.mean_absolute_error(y_test,y_pred_test), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred_test),2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,y_pred_test), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_test,y_pred_test), 2))
print("R2 score =", round(sm.r2_score(y_test, y_pred_test), 2))


FileNotFoundError: ignored