## Imports, Functions, and Variables

In [2]:
import pandas as pd
import numpy as np

data_file = r"Example Activity Report Generated by OpenLM.csv"
substitution_file = r"WorkstationToAgency_SubstitutionsList.csv"
output_file = r"TEST.xlsx"
fields_to_drop = ["Version", "License Type", "Borrowed", "Server", "Vendor", "Additional Key", "Host Ids", "IP",
                  "Project", "Group", "Usage Time w/in filter period", "Consumed Tokens", "Idle Time (hours)",
                  "Token Usage Time", "Token Usage Time w/in filter period", "Session ID",
                  "Source"]
new_fields = ["Agency", "Date", "Product_Workstation", "Product_Username"]

## Create master dataframe from report csv file


In [3]:
master_df = pd.read_csv(filepath_or_buffer=data_file)

## Slim size of dataframe by dropping unneeded fields

In [4]:
master_df.drop(columns=fields_to_drop, inplace=True)

## Field Calculations
NOTE: When i try to use f strings or .format to form the concatenated values I encounter weird results.

#### Get Workstation Substitution Values as a Python Dictionary

In [42]:
substitutions_df = pd.read_csv(filepath_or_buffer=substitution_file)
substitutions_df.set_index("VALUE", inplace=True)
substitutions_dict = substitutions_df.to_dict(orient="dict")["REPLACEMENT"]

#### Add New Fields and Calculate

In [45]:
# Calculate Product_Workstation field
master_df["Product_Workstation"] = master_df["Product"] + "_" + master_df["Workstation"]

# Calculate Product_Username field
master_df["Product_Username"] = master_df["Product"] + "_" + master_df["User Name"]

# Convert Start and End Time fields
master_df["Start Time"] = pd.to_datetime(arg=master_df["Start Time"], dayfirst=True)
master_df["End Time"] = pd.to_datetime(arg=master_df["End Time"], dayfirst=True)

In [46]:
# Calculate Agency field
def workstation_to_agency(workstation_value):
    result = "Research"
    for key, value in substitutions_dict.items():
        if key in workstation_value:
            result = value
            break
        else:
            continue
    return result

master_df["Agency"] = master_df["Workstation"].apply(func=workstation_to_agency)

### Quick Evaluations
#### Unique 'Agency' values and counts

In [62]:
# master_df["Agency"].value_counts()

#### See Full Records for where 'Agency' equals 'Research'

In [63]:
# master_df[(master_df["Agency"] == "Research")][["Product_Workstation", "Agency"]]

## Product_Workstation Report

#### Calculate sum 'Total usage time (hours)' column by group (Agency and Product_Workstation)
NOTE: The .agg() function is applied to entire dataframe but only numeric fields are evaluated. 
The 'Total usage time (hours)' column is the only numeric column. The output is what we are seeking
but if another numeric field was added this would need to be revised.

In [60]:
agency_product_workstation_gbdf = master_df.groupby(by=["Agency", "Product_Workstation"], axis=0)
# agency_product_workstation_gbdf.head(20)

agency_product_workstation_usage_df = agency_product_workstation_gbdf.agg(np.sum)

agency_product_workstation_count = agency_product_workstation_gbdf["Product_Workstation"].agg(np.count_nonzero)
agency_product_workstation_count.name = "Frequency"

agency_product_workstation_usage_df.join(other=agency_product_workstation_count, how="left").head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total usage time (hours),Frequency
Agency,Product_Workstation,Unnamed: 2_level_1,Unnamed: 3_level_1
CHART,ArcInfo_chartgbmap1,0.0,8
CHART,ArcInfo_chartgbtrnmap1,0.0,4
COMP,ArcInfo_chartgbrelmap1,0.0,5
DHMH,ArcInfo_hchd10004,15.71,2
DHMH,ArcInfo_hchd10013,1.23,2


## Product_Username Report

In [64]:
agency_product_username_gbdf = master_df.groupby(by=["Agency", "Product_Username"], axis=0)

agency_product_username_usage_df = agency_product_username_gbdf.agg(np.sum)

agency_product_username_count = agency_product_username_gbdf["Product_Username"].agg(np.count_nonzero)
agency_product_username_count.name = "Frequency"

agency_product_username_usage_df.join(other=agency_product_username_count, how="left").head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total usage time (hours),Frequency
Agency,Product_Username,Unnamed: 2_level_1,Unnamed: 3_level_1
CHART,ArcInfo_chart_sql,0.0,12
COMP,ArcInfo_chart_sql,0.0,5
DHMH,ArcInfo_Loutten,15.71,2
DHMH,ArcInfo_lwalinski,1.23,2
DHMH,ArcView_bfortune,0.95,2
