# Imports and Variables

In [None]:
import pandas as pd
import numpy as np

In [None]:
column_datatypes = {"Feature": str, "Product": str, "Start Time": str, "End Time": str, "Workstation": str,
                    "User Name": str, "First Name": str, "Last Name": str, "Email": str, 
                    "Total usage time (hours)": float}
fields_to_drop = ["Version", "License Type", "Borrowed", "Server", "Vendor", "Additional Key", "Host Ids", "IP",
                  "Project", "Group", "Usage Time w/in filter period", "Consumed Tokens", "Idle Time (hours)",
                  "Token Usage Time", "Token Usage Time w/in filter period", "Session ID",
                  "Source"]
product_label_substitutions_file = r"ProductCodeToProductLabel_SubstitutionsList.csv"
workstation_agency_substitutions_file = r"WorkstationToAgency_SubstitutionsList.csv"

#### DEV Options for the data file to be processed. The report/data file size varies on length of time queried in OpenLM.

In [None]:
# data_file = r"Example Activity Report Generated by OpenLM 1day.csv"
# data_file = r"Example Activity Report Generated by OpenLM NOVtoFEB.csv"
data_file = r"Example Activity Report Generated by OpenLM SEPTtoFEB.csv"

# Process Data
#### Create Master Dataframe from OpenLM Report (.csv) of Interest


In [None]:
master_df = pd.read_csv(filepath_or_buffer=data_file, dtype=str)

#### Slim Size of Dataframe by Dropping Unneeded Fields

In [None]:
master_df.drop(columns=fields_to_drop, inplace=True)

In [None]:
# clean_commas_from_usage_time. pandas.DataFrame.replace wasn't working to find comma in string ??
def replace_commas(value):
    value = str(value)
    if "," in value:
        value =value.replace(",", "")
    return float(value)

In [None]:
master_df["Total usage time (hours)"] = master_df["Total usage time (hours)"].apply(func=replace_commas)
master_df = master_df.astype(dtype=column_datatypes)
# master_df.info()

### Field Calculations
NOTE: When i try to use f strings or .format to form the concatenated values I encounter weird results.

#### Get Product Label Values as a Python Dictionary

In [None]:
product_label_substitutions_df = pd.read_csv(filepath_or_buffer=product_label_substitutions_file)
product_label_substitutions_df.set_index("VALUE", inplace=True)
product_label_substitutions_dict = product_label_substitutions_df.to_dict(orient="dict")["REPLACEMENT"]

In [None]:
# Replace Product Code values with Product Label replacements.
unrecognized_values_set = set()
def product_code_to_label(value):
    try:
        value = product_label_substitutions_dict[value]
    except KeyError:
        value = value.strip("*") # In case ran this step before and already has an '*'
        unrecognized_values_set.add(value)
        value = "*" + value
    return value

In [None]:
master_df["Product"] = master_df["Product"].apply(func=product_code_to_label)

#### Unrecognized Product Names. No Corresponding Product Label Substitution Value

In [None]:
# If no unrecognized values then you will see 'set()' printed out
unrecognized_values_set

#### Get Workstation Substitution Values as a Python Dictionary

In [None]:
workstation_substitutions_df = pd.read_csv(filepath_or_buffer=workstation_agency_substitutions_file)
workstation_substitutions_df.set_index("VALUE", inplace=True)
workstation_substitutions_dict = workstation_substitutions_df.to_dict(orient="dict")["REPLACEMENT"]

#### Add New Fields and Calculate

In [None]:
# Calculate Product_Workstation field
master_df["Product_Workstation"] = master_df["Product"] + "_" + master_df["Workstation"]

# Calculate Product_Username field
master_df["Product_Username"] = master_df["Product"] + "_" + master_df["User Name"]

# Convert Start and End Time fields
master_df["Start Time"] = pd.to_datetime(arg=master_df["Start Time"], dayfirst=True)
master_df["End Time"] = pd.to_datetime(arg=master_df["End Time"], dayfirst=True)

# Calculate Date field from datetime object. For Peak Usage Report
master_df["Date"] = master_df["End Time"].apply(lambda x: x.strftime("%Y%m%d"))

In [None]:
# Calculate Agency field
def workstation_to_agency(workstation_value):
    result = "Research"
    for key, value in workstation_substitutions_dict.items():
        if key in workstation_value:
            result = value
            break
        else:
            continue
    return result

In [None]:
master_df["Agency"] = master_df["Workstation"].apply(func=workstation_to_agency)
# master_df.info()

### Quick Evaluations
#### Unique 'Agency' values and counts

In [None]:
master_df["Agency"].value_counts()

#### See Full Records for where 'Agency' equals 'Research'

In [None]:
# master_df[(master_df["Agency"] == "Research")][["Product_Workstation", "Agency"]]

#### See Workstation Substitutions Values

In [None]:
# workstation_substitutions_dict

#### See Unique Agency Codes Where Substitution Resulted in 'Research'

In [None]:
# master_df[(master_df["Agency"] == "Research")]["Workstation"].unique()

# Report Generation
## Product_Workstation Report

NOTE: The .agg() function is applied to entire dataframe but only numeric fields are evaluated. 
The 'Total usage time (hours)' column is the only numeric column. The output is what we are seeking
but if another numeric field was added this would need to be revised.

In [None]:
agency_product_workstation_gbdf = master_df.groupby(by=["Agency", "Product_Workstation"], axis=0)
agency_product_workstation_usage_df = agency_product_workstation_gbdf.sum()
agency_product_workstation_count = agency_product_workstation_gbdf["Product_Workstation"].agg(np.count_nonzero)
agency_product_workstation_count.name = "Frequency"
product_workstation_report = agency_product_workstation_usage_df.join(other=agency_product_workstation_count, how="left")

## Product_Username Report

In [None]:
agency_product_username_gbdf = master_df.groupby(by=["Agency", "Product_Username"], axis=0)
agency_product_username_usage_df = agency_product_username_gbdf.agg(np.sum)
agency_product_username_count = agency_product_username_gbdf["Product_Username"].agg(np.count_nonzero)
agency_product_username_count.name = "Frequency"
product_username_report = agency_product_username_usage_df.join(other=agency_product_username_count, how="left")

## Evaluation of Product Use by Agency 

TODO: Need to add the ProductLabel field to these reports.

#### Workstation and Username - Per Product Per Agency Report

In [None]:
agency_product_gbdf = master_df.groupby(by=["Agency", "Product"], axis=0)
agency_product_usage_df = agency_product_gbdf.agg(np.sum)
agency_product_count_series = agency_product_gbdf["Product"].agg(np.count_nonzero)
agency_product_count_series.name = "Frequency"
agency_username_count_series = agency_product_gbdf["User Name"].nunique()
agency_username_count_series.name = "Unique Usernames"

# Code preserved in case wanted to produce two separate reports
# agency_product_count_report = agency_product_usage_df.join(other=agency_product_count_series, 
#                                                            how="left").join(other=agency_username_count_series, 
#                                                                             how="left")

agency_workstation_count_series = agency_product_gbdf["Workstation"].nunique()
agency_workstation_count_series.name = "Unique Workstations"
agency_product_count_report = agency_product_usage_df.join(other=agency_product_count_series, 
                                                           how="left").join(other=agency_workstation_count_series, 
                                                                            how="left").join(other=agency_username_count_series, 
                                                                            how="left")

## Peak Usage Report

In [None]:
peak_usage_gbdf = master_df.groupby(by=["Agency", "Date", "Product"], axis=0)
usernames_per_day_series = peak_usage_gbdf["User Name"].nunique()
usernames_per_day_series.name = "Unique Usernames"
usernames_per_day_df = usernames_per_day_series.to_frame()
usernames_per_day_gbdf = usernames_per_day_df.groupby(by=["Agency", "Product"], axis=0)
peak_usage_report = usernames_per_day_gbdf.max().rename(columns={"Unique Usernames": "Max Licenses"})

#### Inspect for Specific Agency Product Usage
Use the below code to verify the findings above for each agencies max usage across all dates a product was used by that agency

In [None]:
 # EDIT MY VALUES
agency_abbreviation_you_want_to_check = "SHA"
product_label_you_want_to_check = "ArcGIS Desktop Advanced"

In [None]:
try:
    agency_isolation_df = usernames_per_day_df.xs(key=agency_abbreviation_you_want_to_check, axis=0, level=0)
    product_isolation_series = agency_isolation_df.xs(key=product_label_you_want_to_check, axis=0, level=1).agg(np.max)
except KeyError as ke:
    print("Key Error occurred. {key} not found. Try again.".format(key=ke))
product_isolation_series.name = "{} - {}".format(agency_abbreviation_you_want_to_check, product_label_you_want_to_check)
product_isolation_series

## Output Reports to Excel File

In [None]:
output_excel_file_name = "TEST_OUTPUT.xlsx"

In [None]:
with pd.ExcelWriter(path=output_excel_file_name, datetime_format="YYYY-MM-DD") as writer:
    product_workstation_report.to_excel(writer, sheet_name="Product Workstation")
    product_username_report.to_excel(writer, sheet_name="Product Username")
    agency_product_count_report.to_excel(writer, sheet_name="Agency Product Use Evaluation")
    peak_usage_report.to_excel(writer, sheet_name="Peak Usage")