<a href="https://colab.research.google.com/github/Moigi/Moigi/blob/main/ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from tabulate import tabulate
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.neural_network import MLPRegressor
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.options.display.max_colwidth=1000


class ANNRegressor:
    def __init__(self):
        self.model = None

    def fit(self, X, y):
        self.model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam')
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)


# Load data
path = r'/content/drive/MyDrive/Colab Notebooks/HR-Employee-Attrition.csv'
data = pd.read_csv(path, encoding='unicode_escape')

# Check column names
print(data.columns)

# Separate features (X) and target variable (y)
X = data.drop(columns=['SPEED'])
y = data['SPEED']

# Initialize models
models = {
    'Random Forest': RandomForestRegressor(),
    'ANN': ANNRegressor()
}

# Initialize results DataFrame
results = pd.DataFrame(columns=['Model', 'MSE', 'MAE', 'R^2 Score', 'Explained Variance', 'Max Error'])

# Train and evaluate models
for model_name, model in models.items():
    # Fit the model
    model.fit(X, y)

    # Make predictions
    y_pred = model.predict(X)

    # Calculate performance metrics
    mse = mean_squared_error(y, y_pred)
    mae = mean_absolute_error(y, y_pred)
    r2 = r2_score(y, y_pred)
    ev = explained_variance_score(y, y_pred)
    max_error = max(abs(y - y_pred))

    # Store results in DataFrame
    results = results.append({'Model': model_name, 'MSE': mse, 'MAE': mae, 'R^2 Score': r2,
                              'Explained Variance': ev, 'Max Error': max_error}, ignore_index=True)

# Print results as a table
print("Performance Metrics")
table = tabulate(results, headers='keys', tablefmt='github')
print(table)

# Define the index and bar width
index = np.arange(len(results))
bar_width = 0.15

# Create a bar plot of the performance metrics
plt.figure(figsize=(10, 6))
plt.bar(index - 2 * bar_width, results['MSE'], color='blue', alpha=0.5, width=bar_width, label='MSE')
plt.bar(index - bar_width, results['MAE'], color='green', alpha=0.5, width=bar_width, label='MAE')
plt.bar(index, results['R^2 Score'], color='orange', alpha=0.5, width=bar_width, label='R^2 Score')
plt.bar(index + bar_width, results['Explained Variance'], color='red', alpha=0.5, width=bar_width,
        label='Explained Variance')
plt.bar(index + 2 * bar_width, results['Max Error'], color='purple', alpha=0.5, width=bar_width, label='Max Error')
plt.xlabel('Model')
plt.ylabel('Performance')
plt.title('Performance Metrics of Random Forest and Deep learnings-based solutions')
plt.legend()
plt.xticks(index, results['Model'], rotation=45)
# Save the plot as an image
plt.savefig('performance_metrics.png')

Index(['Age', 'Attrition', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField', 'EmployeeCount',
       'EmployeeNumber', 'EnvironmentSatisfaction', 'Gender', 'HourlyRate',
       'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',
       'MaritalStatus', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',
       'Over18', 'OverTime', 'PercentSalaryHike', 'PerformanceRating',
       'RelationshipSatisfaction', 'StandardHours', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')


KeyError: ignored

In [5]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
from tabulate import tabulate
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.neural_network import MLPRegressor

# Load data
path = r'/content/drive/MyDrive/Colab Notebooks/HR-Employee-Attrition.csv'
data = pd.read_csv(path, encoding='unicode_escape')

In [6]:
data.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
