In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import pickle

In [3]:
df_train = pd.read_csv("/content/drive/MyDrive/Are_your_employees_burning_out/train.csv")
# df_test = pd.read_csv("/content/drive/MyDrive/Are_your_employees_burning_out/test.csv")

In [4]:
df_train.head()

Unnamed: 0,Employee ID,Date of Joining,Gender,Company Type,WFH Setup Available,Designation,Resource Allocation,Mental Fatigue Score,Burn Rate
0,fffe32003000360033003200,2008-09-30,Female,Service,No,2.0,3.0,3.8,0.16
1,fffe3700360033003500,2008-11-30,Male,Service,Yes,1.0,2.0,5.0,0.36
2,fffe31003300320037003900,2008-03-10,Female,Product,Yes,2.0,,5.8,0.49
3,fffe32003400380032003900,2008-11-03,Male,Service,Yes,1.0,1.0,2.6,0.2
4,fffe31003900340031003600,2008-07-24,Female,Service,No,3.0,7.0,6.9,0.52


In [5]:
df_train = df_train.drop_duplicates(subset=['Employee ID'])
# df_test = df_test.drop_duplicates(subset=['Employee ID'])

In [6]:
df_train = df_train.drop(["Employee ID", "Date of Joining", "Company Type", "Gender"], axis = 1)
# df_test = df_test.drop(["Employee ID", "Date of Joining", "Company Type"], axis = 1)

In [7]:
df_train['WFH Setup Available'] = df_train['WFH Setup Available'].map({'No': -1, 'Yes': 1})

In [8]:
df_train = df_train.dropna()

In [9]:
df_train.head()

Unnamed: 0,WFH Setup Available,Designation,Resource Allocation,Mental Fatigue Score,Burn Rate
0,-1,2.0,3.0,3.8,0.16
1,1,1.0,2.0,5.0,0.36
3,1,1.0,1.0,2.6,0.2
4,-1,3.0,7.0,6.9,0.52
5,1,2.0,4.0,3.6,0.29


In [10]:
df_train['Designation'].value_counts()

2.0    6214
3.0    4920
1.0    3954
4.0    1947
0.0    1238
5.0     317
Name: Designation, dtype: int64

In [11]:
df_train.to_csv('train_preprocessed.csv', index = False)

In [12]:
!mv '/content/train_preprocessed.csv' '/content/drive/MyDrive/Are_your_employees_burning_out/train_preprocessed.csv'

In [13]:
df_train_des_0 = df_train[df_train['Designation'] == 0.0]
df_train_des_1 = df_train[df_train['Designation'] == 1.0]
df_train_des_2 = df_train[df_train['Designation'] == 2.0]
df_train_des_3 = df_train[df_train['Designation'] == 3.0]
df_train_des_4 = df_train[df_train['Designation'] == 4.0]
df_train_des_5 = df_train[df_train['Designation'] == 5.0]

In [14]:
df_train_des_0 = df_train_des_0.drop(['Designation'], axis = 1)
df_train_des_1 = df_train_des_1.drop(['Designation'], axis = 1)
df_train_des_2 = df_train_des_2.drop(['Designation'], axis = 1)
df_train_des_3 = df_train_des_3.drop(['Designation'], axis = 1)
df_train_des_4 = df_train_des_4.drop(['Designation'], axis = 1)
df_train_des_5 = df_train_des_5.drop(['Designation'], axis = 1)

In [15]:
df_train_des_0.head()

Unnamed: 0,WFH Setup Available,Resource Allocation,Mental Fatigue Score,Burn Rate
11,1,1.0,1.8,0.12
32,-1,1.0,3.0,0.19
37,1,2.0,3.9,0.19
82,1,1.0,0.8,0.05
112,1,1.0,2.6,0.18


## For designation 0

In [16]:
X = df_train_des_0.drop(['Burn Rate'], axis = 1)
y = df_train_des_0['Burn Rate']
# X_test = df_test.drop(['Burn Rate'], axis = 1)
# y_test = df_test['Burn Rate']

In [17]:
# # Generate some example data
# np.random.seed(42)
# X = np.random.rand(100, 5)  # 100 samples, 5 features
# y = 2*X[:, 0] + 3*X[:, 1] + 4*X[:, 2] + 5*X[:, 3] + 6*X[:, 4] + 2 + 0.1*np.random.randn(100)  # Linear combination with noise

In [18]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Fit a linear regression model
model_des_0 = LinearRegression()
model_des_0.fit(X_train, y_train)

In [20]:
# Get the weights (coefficients) and bias (intercept)
weights = model_des_0.coef_
bias = model_des_0.intercept_

In [21]:
# Print the weights and bias
print("Weights (Coefficients):", weights)
print("Bias (Intercept):", bias)

Weights (Coefficients): [0.00059435 0.01919768 0.05873248]
Bias (Intercept): -0.02695521736796405


In [22]:
y_pred = model_des_0.predict(X_test)

In [23]:
mse = mean_squared_error(y_test, y_pred)

In [24]:
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0012884441063531805


In [25]:
with open('regression_model_des0.pkl', 'wb') as model_file:
    pickle.dump(model_des_0, model_file)

## For designation 1

In [26]:
X = df_train_des_1.drop(['Burn Rate'], axis = 1)
y = df_train_des_1['Burn Rate']
# X_test = df_test.drop(['Burn Rate'], axis = 1)
# y_test = df_test['Burn Rate']

In [27]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
# Fit a linear regression model
model_des_1 = LinearRegression()
model_des_1.fit(X_train, y_train)

In [29]:
# Get the weights (coefficients) and bias (intercept)
weights = model_des_1.coef_
bias = model_des_1.intercept_

In [30]:
# Print the weights and bias
print("Weights (Coefficients):", weights)
print("Bias (Intercept):", bias)

Weights (Coefficients): [-0.00303212  0.02074924  0.07094721]
Bias (Intercept): -0.05955500979744721


In [31]:
y_pred = model_des_1.predict(X_test)

In [32]:
mse = mean_squared_error(y_test, y_pred)

In [33]:
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0025112554644726266


In [34]:
with open('regression_model_des1.pkl', 'wb') as model_file:
    pickle.dump(model_des_1, model_file)

## For designation 2

In [35]:
X = df_train_des_2.drop(['Burn Rate'], axis = 1)
y = df_train_des_2['Burn Rate']
# X_test = df_test.drop(['Burn Rate'], axis = 1)
# y_test = df_test['Burn Rate']

In [36]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
# Fit a linear regression model
model_des_2 = LinearRegression()
model_des_2.fit(X_train, y_train)

In [38]:
# Get the weights (coefficients) and bias (intercept)
weights = model_des_2.coef_
bias = model_des_2.intercept_

In [39]:
# Print the weights and bias
print("Weights (Coefficients):", weights)
print("Bias (Intercept):", bias)

Weights (Coefficients): [-0.00607127  0.02527233  0.0790592 ]
Bias (Intercept): -0.11719479853351966


In [40]:
y_pred = model_des_2.predict(X_test)

In [41]:
mse = mean_squared_error(y_test, y_pred)

In [42]:
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0031424277690831007


In [43]:
with open('regression_model_des2.pkl', 'wb') as model_file:
    pickle.dump(model_des_2, model_file)

# For designation 3

In [44]:
X = df_train_des_3.drop(['Burn Rate'], axis = 1)
y = df_train_des_3['Burn Rate']
# X_test = df_test.drop(['Burn Rate'], axis = 1)
# y_test = df_test['Burn Rate']

In [45]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [46]:
# Fit a linear regression model
model_des_3 = LinearRegression()
model_des_3.fit(X_train, y_train)

In [47]:
# Get the weights (coefficients) and bias (intercept)
weights = model_des_3.coef_
bias = model_des_3.intercept_

In [48]:
# Print the weights and bias
print("Weights (Coefficients):", weights)
print("Bias (Intercept):", bias)

Weights (Coefficients): [-0.00630675  0.02912954  0.0811169 ]
Bias (Intercept): -0.1601507238663965


In [49]:
y_pred = model_des_3.predict(X_test)

In [50]:
mse = mean_squared_error(y_test, y_pred)

In [51]:
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0035158640338125696


In [52]:
with open('regression_model_des3.pkl', 'wb') as model_file:
    pickle.dump(model_des_3, model_file)

## For designation 4

In [53]:
X = df_train_des_4.drop(['Burn Rate'], axis = 1)
y = df_train_des_4['Burn Rate']
# X_test = df_test.drop(['Burn Rate'], axis = 1)
# y_test = df_test['Burn Rate']

In [54]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [55]:
# Fit a linear regression model
model_des_4 = LinearRegression()
model_des_4.fit(X_train, y_train)

In [56]:
# Get the weights (coefficients) and bias (intercept)
weights = model_des_4.coef_
bias = model_des_4.intercept_

In [57]:
# Print the weights and bias
print("Weights (Coefficients):", weights)
print("Bias (Intercept):", bias)

Weights (Coefficients): [-0.0027939   0.02795119  0.07740094]
Bias (Intercept): -0.12095795716822455


In [58]:
y_pred = model_des_4.predict(X_test)

In [59]:
mse = mean_squared_error(y_test, y_pred)

In [60]:
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.004011555246730404


In [61]:
with open('regression_model_des4.pkl', 'wb') as model_file:
    pickle.dump(model_des_4, model_file)

## For designation 5

In [62]:
X = df_train_des_5.drop(['Burn Rate'], axis = 1)
y = df_train_des_5['Burn Rate']
# X_test = df_test.drop(['Burn Rate'], axis = 1)
# y_test = df_test['Burn Rate']

In [63]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [64]:
# Fit a linear regression model
model_des_5 = LinearRegression()
model_des_5.fit(X_train, y_train)

In [65]:
# Get the weights (coefficients) and bias (intercept)
weights = model_des_5.coef_
bias = model_des_5.intercept_

In [66]:
# Print the weights and bias
print("Weights (Coefficients):", weights)
print("Bias (Intercept):", bias)

Weights (Coefficients): [0.00404156 0.02623495 0.07188201]
Bias (Intercept): -0.020223479611829576


In [67]:
y_pred = model_des_5.predict(X_test)

In [68]:
mse = mean_squared_error(y_test, y_pred)

In [69]:
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.003192312201403476


In [70]:
with open('regression_model_des5.pkl', 'wb') as model_file:
    pickle.dump(model_des_5, model_file)

In [71]:
!mv '/content/regression_model_des0.pkl' '/content/drive/MyDrive/Computational Intelligence CW/Regression_models/regression_model_des0.pkl'
!mv '/content/regression_model_des1.pkl' '/content/drive/MyDrive/Computational Intelligence CW/Regression_models/regression_model_des1.pkl'
!mv '/content/regression_model_des2.pkl' '/content/drive/MyDrive/Computational Intelligence CW/Regression_models/regression_model_des2.pkl'
!mv '/content/regression_model_des3.pkl' '/content/drive/MyDrive/Computational Intelligence CW/Regression_models/regression_model_des3.pkl'
!mv '/content/regression_model_des4.pkl' '/content/drive/MyDrive/Computational Intelligence CW/Regression_models/regression_model_des4.pkl'
!mv '/content/regression_model_des5.pkl' '/content/drive/MyDrive/Computational Intelligence CW/Regression_models/regression_model_des5.pkl'