## Importing libraries

In [149]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from apyori import apriori

%matplotlib inline

## Reading input file

In [150]:
path_to_input_file = os.path.join(os.getcwd(), 'Employee_skills_traits.csv')
employee_skills_df = pd.read_csv(path_to_input_file)
employee_skills_df

Unnamed: 0,ID,Employment period,Time in current department,Gender,Team leader,Age,Member of professional organizations,.Net,SQL Server,HTML CSS Java Script,PHP mySQL,Fast working,Awards,Communicative
0,6723,5,4,0,1,48,1,1,1,0,1,1,0,1
1,8923,5,3,1,1,26,1,1,0,0,1,1,0,0
2,2322,11,8,0,0,34,0,0,1,1,0,1,0,1
3,235,7,5,1,1,27,1,0,1,0,1,1,1,0
4,9523,18,8,0,0,38,0,0,0,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,6098,12,3,1,1,32,1,0,0,0,1,1,0,1
994,12133,16,8,1,0,36,1,1,1,0,0,0,1,1
995,854,11,4,0,1,34,1,0,1,0,1,1,0,0
996,13444,8,8,1,0,36,0,0,1,0,0,0,1,1


## Removing whitespaces from column names

In [151]:
employee_skills_df.columns = employee_skills_df.columns.str.strip()
employee_skills_df.head()

Unnamed: 0,ID,Employment period,Time in current department,Gender,Team leader,Age,Member of professional organizations,.Net,SQL Server,HTML CSS Java Script,PHP mySQL,Fast working,Awards,Communicative
0,6723,5,4,0,1,48,1,1,1,0,1,1,0,1
1,8923,5,3,1,1,26,1,1,0,0,1,1,0,0
2,2322,11,8,0,0,34,0,0,1,1,0,1,0,1
3,235,7,5,1,1,27,1,0,1,0,1,1,1,0
4,9523,18,8,0,0,38,0,0,0,1,1,1,1,1


## Removing columns which are not relevant to finding associations

In [152]:
employee_skills_df.drop(columns=["ID","Employment period","Time in current department","Age"], inplace=True)
employee_skills_df.head()

# The skills/traits which are most common can be helpful in getting the threshold of minimum support count
for col in employee_skills_df.columns:
    print(employee_skills_df[col].value_counts())

0    514
1    484
Name: Gender, dtype: int64
1    503
0    495
Name: Team leader, dtype: int64
0    509
1    489
Name: Member of professional organizations, dtype: int64
0    522
1    476
Name: .Net, dtype: int64
0    523
1    475
Name: SQL Server, dtype: int64
1    507
0    491
Name: HTML CSS Java Script, dtype: int64
0    521
1    477
Name: PHP mySQL, dtype: int64
1    502
0    496
Name: Fast working, dtype: int64
1    501
0    497
Name: Awards, dtype: int64
1    520
0    478
Name: Communicative, dtype: int64


## Converting dataset into list of lists

In [153]:
rows, columns = employee_skills_df.shape
records = []
for row in range(rows):
    records.append([employee_skills_df.columns[col] for col in range(columns) if int(employee_skills_df.values[row, col]) == 1])

## Extracting association rules using apriori algorithm

In [154]:
association_rules = apriori(records, min_support=0.0052, min_confidence=0.20, min_lift=2.5, min_length=2)

for item in association_rules:
    print("=========================================================================")
    print("Item Set :")
    print([x for x in item[0]])
    print(f"Support for this item set : {item[1]}")
    print("Rules for this item set :")
    for index,val in enumerate(item[2]):
        print("*******")
        print(f"Rule_{index + 1} :")
        print(f"{[x for x in val[0]]} ----> {[x for x in val[1]]}")
        print(f"Confidence and lift for this rule : {val[2]} and {val[3]} respectively")
        print("*******")
    print("=========================================================================")
    print("\n\n")

Item Set :
['HTML CSS Java Script', 'Fast working', 'Communicative', 'Member of professional organizations', 'Gender', 'Awards', 'SQL Server', '.Net']
Support for this item set : 0.006012024048096192
Rules for this item set :
*******
Rule_1 :
['HTML CSS Java Script', 'Communicative', 'Gender', 'SQL Server', '.Net'] ----> ['Member of professional organizations', 'Awards', 'Fast working']
Confidence and lift for this rule : 0.3333333333333333 and 2.8192090395480225 respectively
*******



Item Set :
['HTML CSS Java Script', 'Fast working', 'Communicative', 'Member of professional organizations', 'Team leader', 'Gender', 'Awards', '.Net']
Support for this item set : 0.008016032064128256
Rules for this item set :
*******
Rule_1 :
['HTML CSS Java Script', 'Fast working', 'Communicative', 'Gender', '.Net'] ----> ['Member of professional organizations', 'Awards', 'Team leader']
Confidence and lift for this rule : 0.2962962962962963 and 2.549169859514687 respectively
*******
*******
Rule_2 :
[