**Step 1: Loading the Employee Dataset**

In [1]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [2]:
import pandas as pd

In [9]:
data = pd.read_csv('/Employee Sample Data 1.csv', encoding='ISO-8859-1')


In [10]:
data.head()


Unnamed: 0,Employee ID,Full Name,Job Title,Department,Business Unit,Gender,Ethnicity,Age,Hire Date,Annual Salary,Bonus %,Country,City,Exit Date
0,E02002,Kai Le,Controls Engineer,Engineering,Manufacturing,Male,Asian,47.0,2/5/2022,"$92,368",0%,United States,Columbus,
1,E02003,Robert Patel,Analyst,Sales,Corporate,Male,Asian,58.0,10/23/2013,"$45,703",0%,United States,Chicago,
2,E02004,Cameron Lo,Network Administrator,IT,Research & Development,Male,Asian,34.0,3/24/2019,"$83,576",0%,China,Shanghai,
3,E02005,Harper Castillo,IT Systems Architect,IT,Corporate,Female,Latino,39.0,4/7/2018,"$98,062",0%,United States,Seattle,
4,E02006,Harper Dominguez,Director,Engineering,Corporate,Female,Latino,42.0,6/18/2005,"$175,391",24%,United States,Austin,


**Step 2: Defining Functions**

**createCollection(p_collection_name): This function will create a collection (in this case, a DataFrame).**

In [20]:
# Creating an empty collection (DataFrame)
collections = {}

def createCollection(p_collection_name):
    global collections
    collections[p_collection_name] = pd.DataFrame()
    print(f"Collection '{p_collection_name}' created.")


**indexData(p_collection_name, p_exclude_column): This will add employee data to the collection, excluding the specified column.**

In [14]:
def indexData(p_collection_name, p_exclude_column):
    global collections
    if p_exclude_column in data.columns:
        indexed_data = data.drop(columns=[p_exclude_column])
        collections[p_collection_name] = indexed_data
        print(f"Data indexed in '{p_collection_name}', excluding '{p_exclude_column}'.")
    else:
        print(f"Column '{p_exclude_column}' not found in the dataset.")


**searchByColumn(p_collection_name, p_column_name, p_column_value): This will search for rows where a specific column matches a given value.**

In [15]:
def searchByColumn(p_collection_name, p_column_name, p_column_value):
    global collections
    if p_collection_name in collections:
        collection = collections[p_collection_name]
        if p_column_name in collection.columns:
            result = collection[collection[p_column_name] == p_column_value]
            print(f"Search results for '{p_column_name} = {p_column_value}' in '{p_collection_name}':")
            print(result)
        else:
            print(f"Column '{p_column_name}' not found in collection '{p_collection_name}'.")
    else:
        print(f"Collection '{p_collection_name}' not found.")


**getEmpCount(p_collection_name): This will return the number of employees (rows) in the specified collection.**

In [16]:
def getEmpCount(p_collection_name):
    global collections
    if p_collection_name in collections:
        collection = collections[p_collection_name]
        count = len(collection)
        print(f"Employee count in '{p_collection_name}': {count}")
    else:
        print(f"Collection '{p_collection_name}' not found.")


**delEmpById(p_collection_name, p_employee_id): This will delete an employee from the collection by their ID.**

In [17]:
def delEmpById(p_collection_name, p_employee_id):
    global collections
    if p_collection_name in collections:
        collection = collections[p_collection_name]
        if 'EmployeeID' in collection.columns:
            collections[p_collection_name] = collection[collection['EmployeeID'] != p_employee_id]
            print(f"Employee with ID '{p_employee_id}' deleted from '{p_collection_name}'.")
        else:
            print(f"'EmployeeID' column not found in collection '{p_collection_name}'.")
    else:
        print(f"Collection '{p_collection_name}' not found.")


**getDepFacet(p_collection_name): This will group employees by the department and return a count.**

In [18]:
def getDepFacet(p_collection_name):
    global collections
    if p_collection_name in collections:
        collection = collections[p_collection_name]
        if 'Department' in collection.columns:
            facet = collection.groupby('Department').size()
            print(f"Department facet for '{p_collection_name}':")
            print(facet)
        else:
            print(f"'Department' column not found in collection '{p_collection_name}'.")
    else:
        print(f"Collection '{p_collection_name}' not found.")


**Step 3: Executing the Functions**

**Var v_nameCollection = ‘Hash_<Your Name>’**

In [23]:
v_nameCollection = 'Hash_JohnDoe'

**Var v_phoneCollection =’Hash_<Your Phone last four digits’**

In [22]:
v_phoneCollection = 'Hash_1234'

**createCollection(v_nameCollection)**

In [24]:
createCollection(v_nameCollection)

Collection 'Hash_JohnDoe' created.


**createCollection(v_phoneCollection)**

In [25]:
createCollection(v_phoneCollection)

Collection 'Hash_1234' created.


**getEmpCount(v_nameCollection)**

In [26]:
getEmpCount(v_nameCollection)

Employee count in 'Hash_JohnDoe': 0


**indexData(v_nameCollection,’Department’)**

In [27]:
indexData(v_nameCollection, 'Department')

Data indexed in 'Hash_JohnDoe', excluding 'Department'.


**indexData(v_ phoneCollection, ‘Gender’)**

In [28]:
indexData(v_phoneCollection, 'Gender')

Data indexed in 'Hash_1234', excluding 'Gender'.


**delEmpById (v_ nameCollection ,‘E02003’)**

In [29]:
delEmpById(v_nameCollection, 'E02003')

'EmployeeID' column not found in collection 'Hash_JohnDoe'.


**getEmpCount(v_nameCollection)**

In [30]:
getEmpCount(v_nameCollection)

Employee count in 'Hash_JohnDoe': 1262


**searchByColumn(v_nameCollection,’Department’,’IT’)**

In [33]:
searchByColumn(v_nameCollection, 'Department', 'IT')

Column 'Department' not found in collection 'Hash_JohnDoe'.


**searchByColumn(v_nameCollection,’Gender’ ,’Male’)**

In [34]:
searchByColumn(v_nameCollection, 'Gender', 'Male')

Search results for 'Gender = Male' in 'Hash_JohnDoe':
     Employee ID        Full Name              Job Title  \
0         E02002           Kai Le      Controls Engineer   
1         E02003     Robert Patel                Analyst   
2         E02004       Cameron Lo  Network Administrator   
5         E02007          Ezra Vu  Network Administrator   
7         E02009      Miles Chang             Analyst II   
...          ...              ...                    ...   
1251      E02244    Connor Howard        Systems Analyst   
1253      E02246     Landon Reyes               Director   
1254      E02247          Noah Ma         Vice President   
1255      E02248  Lucas Alexander               Director   
1256      E02249         Henry Vo      Controls Engineer   

               Business Unit Gender  Ethnicity   Age   Hire Date  \
0              Manufacturing   Male      Asian  47.0    2/5/2022   
1                  Corporate   Male      Asian  58.0  10/23/2013   
2     Research & Deve

**searchByColumn(v_ phoneCollection,’Department’,’IT’)**

In [35]:
searchByColumn(v_phoneCollection, 'Department', 'IT')

Search results for 'Department = IT' in 'Hash_1234':
     Employee ID        Full Name              Job Title Department  \
2         E02004       Cameron Lo  Network Administrator         IT   
3         E02005  Harper Castillo   IT Systems Architect         IT   
5         E02007          Ezra Vu  Network Administrator         IT   
8         E02010    Gianna Holmes  System Administrator          IT   
10        E02012     Jameson Pena        Systems Analyst         IT   
...          ...              ...                    ...        ...   
1253      E02246     Landon Reyes               Director         IT   
1255      E02248  Lucas Alexander               Director         IT   
1258      E02251  Genesis Herrera                Manager         IT   
1259      E02252   Olivia Vazquez       Network Engineer         IT   
1260      E02253       Leilani Ng        Systems Analyst         IT   

               Business Unit  Ethnicity   Age   Hire Date Annual Salary  \
2     Research & De

**getDepFacet(v_ nameCollection)**

In [41]:
getDepFacet(v_nameCollection)

'Department' column not found in collection 'Hash_JohnDoe'.


**getDepFacet(v_ phoneCollection)**

In [37]:
getDepFacet(v_phoneCollection)

Department facet for 'Hash_1234':
Department
Accounting         132
Engineering        159
Finance            129
Human Resources    124
IT                 347
Marketing          136
Sales              188
dtype: int64
