<a href="https://colab.research.google.com/github/JoshitaReddy/burnout/blob/main/HACKFIT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing Libraries

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import joblib

import warnings
warnings.filterwarnings(action='ignore')


#Reading the Dataset

In [10]:
data = pd.read_csv('train.csv')

#Pre-processing Data

In [11]:
def preprocess_inputs(df):
    df = df.copy()
    
    # Drop Employee ID column
    df = df.drop('Employee ID', axis=1)
    
    # Drop rows with missing target values
    missing_target_rows = df.loc[df['Burn Rate'].isna(), :].index
    df = df.drop(missing_target_rows, axis=0).reset_index(drop=True)
    
    # Fill remaining missing values with column means
    for column in ['Resource Allocation', 'Mental Fatigue Score']:
        df[column] = df[column].fillna(df[column].mean())
    
    # Extract date features
    df['Date of Joining'] = pd.to_datetime(df['Date of Joining'])
    df['Join Month'] = df['Date of Joining'].apply(lambda x: x.month)
    df['Join Day'] = df['Date of Joining'].apply(lambda x: x.day)
    df = df.drop('Date of Joining', axis=1)
    
    # Binary encoding
    df['Gender'] = df['Gender'].replace({'Female': 0, 'Male': 1})
    df['Company Type'] = df['Company Type'].replace({'Product': 0, 'Service': 1})
    df['WFH Setup Available'] = df['WFH Setup Available'].replace({'No': 0, 'Yes': 1})
    
    # Split df into X and y
    y = df['Burn Rate']
    X = df.drop('Burn Rate', axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    
    return X_train, X_test, y_train, y_test

In [12]:
X_train, X_test, y_train, y_test = preprocess_inputs(data)

In [13]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15138 entries, 8275 to 235
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Gender                15138 non-null  float64
 1   Company Type          15138 non-null  float64
 2   WFH Setup Available   15138 non-null  float64
 3   Designation           15138 non-null  float64
 4   Resource Allocation   15138 non-null  float64
 5   Mental Fatigue Score  15138 non-null  float64
 6   Join Month            15138 non-null  float64
 7   Join Day              15138 non-null  float64
dtypes: float64(8)
memory usage: 1.0 MB


#Applying Random Forest Regression

In [14]:
model=RandomForestRegressor()
model.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

##Calculating R^2 Score

In [15]:
print("R^2 Score: {:.2f}".format(model.score(X_test, y_test)))

R^2 Score: 0.90


#Saving the Model

In [16]:
joblib.dump(model, 'model.pkl')

['model.pkl']

#Taking User Inputs

In [17]:
name=input('What should we call you? ')
gender=input('What gender do you identify yourself as? ')
company=input('What is kind of company do you work for? ')
wfh=input('Do you work from home? ')
designation=int(input('On a range of 0-5 what is your level of seniority in the company? '))
hours=int(input('How many hours do you work on a daily basis? '))
fatigue=input('On a scale of 1-10 how much tired are you everyday? ')
print('When did you start working for this company? ')
month=int(input('Month: '))
day=int(input('Day: '))
year=int(input('Year: '))

What should we call you? Joshita
What gender do you identify yourself as? Female
What is kind of company do you work for? Service
Do you work from home? Yes
On a range of 0-5 what is your level of seniority in the company? 3
How many hours do you work on a daily basis? 7
On a scale of 1-10 how much tired are you everyday? 6
When did you start working for this company? 
Month: 09
Day: 07
Year: 2009


##Prepreocessing the User Inputs

In [18]:
if gender=='Female': gender=0
else: gender=1
if company=='Service': company=1
else: company=0
if company=='No': wfh=0
else: wfh=1

##Making Prediction

In [19]:
def ValuePredictor(to_predict_list):
    to_predict = np.array(to_predict_list).reshape(1, 8)
    loaded_model=joblib.load('model.pkl')
    result = loaded_model.predict(to_predict)
    return result[0]

scaler=StandardScaler()
def result():
        to_predict_list = scaler.fit_transform([[gender,company,hours,wfh,hours,fatigue,month,month]])
        return ValuePredictor(to_predict_list)             

##Result

In [20]:
r=result()
print("%s your Burnout Rate is %.2f%%"%(name,r*100))

Joshita your Burnout Rate is 45.36%


In [21]:
if(r>0.75): 
  print("Your Burnout Rate indicates you are need try and relieve your stress!")
else: 
  print("Your Burnout Rate indicates you good to go!")

Your Burnout Rate indicates you good to go!
