## Import statements

In [186]:
import sys
sys.path.insert(0, '/Users/ahell/Documents/Python Projects/AIToolkit')

import os
from dotenv import load_dotenv, find_dotenv
from ai_toolkit import LLM, PromptBuilder, FileReader, FileWriter, AIProcess, Function, APIRequest, AITool
from ai_toolkit.operations import ConvertToJSON, ExtractKey, Calculator, Passthrough
from IPython.display import display, Markdown, HTML, clear_output
from scipy.stats import norm

In [187]:
# Load in the environment variables
load_dotenv(find_dotenv())

True

In [188]:
LLM.ALL_MODEL_NAMES

['gpt-3.5-turbo',
 'gpt-4',
 'gpt-3.5-turbo-16k',
 'claude-1',
 'claude-1-100k',
 'claude-instant-1',
 'claude-instant-1-100k',
 'respell-gpt-4-wrapper']

In [189]:
# Marcfirst: adf4f406-757c-4e7d-b881-add3de4bbaf4
# IEC: 8d16a3a2-3bb5-4abd-a7e3-e359ffd209b0

client_survey_id = 'adf4f406-757c-4e7d-b881-add3de4bbaf4'
client_name = 'Marcfirst'

## Dimension Analysis

In [190]:
group = 'Leadership Tier'
level = 'Driver'
request_url = f"https://dev-api.insite.ledgestone.com/api/results/summary/{client_survey_id}/main/{level.lower()}?group={group}"
overall_summary_url = f"https://dev-api.insite.ledgestone.com/api/results/summary/{client_survey_id}/main/{level.lower()}"
metadata_url = f"https://dev-api.insite.ledgestone.com/api/results/metadata/{client_survey_id}?group={group}"
overall_metadata_url = f"https://dev-api.insite.ledgestone.com/api/results/metadata/{client_survey_id}"
request_method = "GET"
request_headers = {
    "Authorization": f"Bearer {os.environ['BEARER_TOKEN']}",
    "Content-Type": "application/json"
}
grouped_summary_graph = APIRequest(f"{level} Graph Grouped By {group}").set_input(
    url=request_url, method=request_method, headers=request_headers)
grouped_summary_graph.process()

results_from_grouped_summary_graph = ExtractKey("Results from Grouped Summary Graph").set_input(
    input=grouped_summary_graph, key_name='results')
results_from_grouped_summary_graph.process()

grouped_metadata = APIRequest(f"{level} Metadata Grouped By {group}").set_input(
    url=metadata_url, method=request_method, headers=request_headers)
grouped_metadata.process()

results_from_grouped_metadata = ExtractKey("Results from Grouped Metadata").set_input(
    input=grouped_metadata, key_name='results')
results_from_grouped_metadata.process()

overall_summary_graph = APIRequest(f"{level} Graph").set_input(
    url=overall_summary_url, method=request_method, headers=request_headers)
overall_summary_graph.process()

results_from_overall_summary_graph = ExtractKey("Results from Overall Summary Graph").set_input(
    input=overall_summary_graph, key_name='results')
results_from_overall_summary_graph.process()

overall_metadata = APIRequest(f"{level} Overall Metadata").set_input(
    url=overall_metadata_url, method=request_method, headers=request_headers)
overall_metadata.process()

results_from_overall_metadata = ExtractKey("Results from Overall Metadata").set_input(
    input=overall_metadata, key_name='results')
results_from_overall_metadata.process()


Results from Overall Metadata (ExtractKey)

In [191]:
# Try to put all results into a single dataframe
results_from_grouped_summary_graph.get_output()

# Result:
# {'COO': {'Value': {'score': 67, 'maximum': 67, 'minimum': 67},
#   'Purpose': {'score': 76, 'maximum': 76, 'minimum': 76},
#   'Security': {'score': 64, 'maximum': 64, 'minimum': 64},
#   'Alignment': {'score': 65, 'maximum': 64, 'minimum': 64},
#   'Connection': {'score': 82, 'maximum': 81, 'minimum': 81},
#   'Accomplishment': {'score': 70, 'maximum': 69, 'minimum': 69}},
#  'M&G': {'Value': {'score': 53, 'maximum': 100, 'minimum': 6},
#   'Purpose': {'score': 49, 'maximum': 98, 'minimum': 0},
#   'Security': {'score': 44, 'maximum': 100, 'minimum': -12},
#   'Alignment': {'score': 39, 'maximum': 94, 'minimum': -16},
#   'Connection': {'score': 44, 'maximum': 96, 'minimum': -9},
#   'Accomplishment': {'score': 42, 'maximum': 100, 'minimum': -16}},
# ...

results_from_grouped_metadata.get_output()

# Result:
# {'composite': {'COO': 71,
#   'M&G': 45,
#   'ROAM': -4,
#   'Nursing': 53,
#   'Adult ABA': 13,
#   'Accounting': 59,
#   'Contractor': 42,
#   'Janitorial': 36,
#   'Transition': 65,
#  'respondents': {'CEO': 0,
#   'COO': 1,
#   'M&G': 2,
#   'ROAM': 1,
# ...

# Create a dataframe merging the two results
import pandas as pd
import numpy as np


df_metadata = pd.DataFrame(results_from_grouped_metadata.get_output())
df_metadata = df_metadata.drop(columns=['comment_count', 'comment_rate'])

grouped_summary_dict = results_from_grouped_summary_graph.get_output()
if '' in grouped_summary_dict:
    grouped_summary_dict.pop('')
df_summary = pd.DataFrame(grouped_summary_dict)
df_summary = df_summary.transpose()
df_summary = df_summary.applymap(lambda x: x['score'])


# Remove rows where respondents is not >= 5
df_summary = df_summary[df_metadata['respondents'] >= 5]
df_metadata = df_metadata[df_metadata['respondents'] >= 5]


display(df_summary)
display(df_metadata)

Unnamed: 0,Team,Pride,Trust,Equity,Impact,Vision,Clarity,Mastery,Meaning,Strategy,Execution,Investment,Supervisor,Advancement,Empowerment,Appreciation,Organization,Significance
Employee,37,54,32,30,40,35,27,38,61,30,14,38,67,28,44,44,40,53
Supervisor,21,55,17,27,40,27,10,24,54,16,9,19,66,27,39,42,38,42
Sr/Exec Leader,76,92,72,55,65,61,56,62,78,77,63,64,78,64,65,72,75,77


Unnamed: 0,composite,respondents,composite_max,composite_min,response_rate,positivity_rate
Employee,40,62,98,-51,50,35
Supervisor,32,11,68,1,55,28
Sr/Exec Leader,70,6,84,55,86,65


In [192]:
display(results_from_overall_metadata.get_output())
display(results_from_overall_summary_graph.get_output())

{'composite': {'Overall': 41},
 'respondents': {'Overall': 82},
 'comment_rate': {'Overall': 44},
 'comment_count': {'Overall': 36},
 'composite_max': {'Overall': 98},
 'composite_min': {'Overall': -51},
 'response_rate': {'Overall': 53},
 'positivity_rate': {'Overall': 36}}

{'Overall': {'Team': {'score': 38, 'maximum': 95, 'minimum': -55},
  'Pride': {'score': 57, 'maximum': 100, 'minimum': -87},
  'Trust': {'score': 33, 'maximum': 100, 'minimum': -59},
  'Equity': {'score': 32, 'maximum': 100, 'minimum': -73},
  'Impact': {'score': 42, 'maximum': 100, 'minimum': -65},
  'Vision': {'score': 37, 'maximum': 100, 'minimum': -100},
  'Clarity': {'score': 27, 'maximum': 100, 'minimum': -100},
  'Mastery': {'score': 37, 'maximum': 100, 'minimum': -92},
  'Meaning': {'score': 61, 'maximum': 100, 'minimum': -10},
  'Strategy': {'score': 32, 'maximum': 100, 'minimum': -100},
  'Execution': {'score': 17, 'maximum': 100, 'minimum': -100},
  'Investment': {'score': 37, 'maximum': 100, 'minimum': -54},
  'Supervisor': {'score': 68, 'maximum': 100, 'minimum': -21},
  'Advancement': {'score': 31, 'maximum': 100, 'minimum': -97},
  'Empowerment': {'score': 44, 'maximum': 100, 'minimum': -98},
  'Appreciation': {'score': 45, 'maximum': 100, 'minimum': -81},
  'Organizatio

In [193]:
org_average = results_from_overall_metadata.get_output()['composite']['Overall']
df = df_summary.copy()

# Step 1: Flatten the DataFrame and calculate mean and standard deviation
flattened_data = df.values.flatten()
mean = flattened_data.mean()
std_dev = flattened_data.std()

# Step 2: Identify what percentage of the data is 10 data points
percentage = 10 / len(flattened_data)
# print(f"Percentage: {percentage}")
N = norm.ppf(0.5 + (1-percentage)/2)
# print(f"N: {N}")

# Step 2: Identify points that are more than 2 standard deviations away from the mean
anomalies = []
for index, row in df.iterrows():
    for col in df.columns:
        if abs(row[col] - mean) > N * std_dev:
            anomalies.append((index, col, row[col]))

# Step 3: Print the anomalies
for anomaly in anomalies:
    print(f"The {anomaly[0]} {group} scored significantly {'higher' if anomaly[2] > org_average else 'lower'} than the organization average on the {anomaly[1].lower()} {level.lower()} with a score of {anomaly[2]}.")

The Employee Leadership Tier scored significantly lower than the organization average on the execution driver with a score of 14.
The Supervisor Leadership Tier scored significantly lower than the organization average on the trust driver with a score of 17.
The Supervisor Leadership Tier scored significantly lower than the organization average on the clarity driver with a score of 10.
The Supervisor Leadership Tier scored significantly lower than the organization average on the strategy driver with a score of 16.
The Supervisor Leadership Tier scored significantly lower than the organization average on the execution driver with a score of 9.
The Supervisor Leadership Tier scored significantly lower than the organization average on the investment driver with a score of 19.
The Sr/Exec Leader Leadership Tier scored significantly higher than the organization average on the team driver with a score of 76.
The Sr/Exec Leader Leadership Tier scored significantly higher than the organization 

In [194]:
import pandas as pd

# Create a series from your data
series = df_metadata['composite']

# Step 1: Calculate the mean and standard deviation
mean = series.mean()
std_dev = series.std()

# Step 2: Calculate Z-scores
z_scores = (series - mean) / std_dev

# Get the number of groups
num_groups = len(df_metadata)
print(f"Number of groups: {num_groups}")

# Alter the z-score threshold based on the number of groups
if num_groups <= 3:
    z_score_reasonable_threshold = 0.5
    z_score_significant_threshold = 1
elif num_groups <= 6:
    z_score_reasonable_threshold = 0.75
    z_score_significant_threshold = 1.5
else:
    z_score_reasonable_threshold = 1
    z_score_significant_threshold = 2

# Step 3: Classify based on Z-scores
classification = {}
for value, z_score in z_scores.items():
    if z_score < -z_score_significant_threshold:
        classification[value] = "significantly lower"
    elif -z_score_significant_threshold <= z_score < -z_score_reasonable_threshold:
        classification[value] = "reasonably lower"
    elif -z_score_reasonable_threshold <= z_score <= z_score_reasonable_threshold:
        classification[value] = "normal"
    elif z_score_reasonable_threshold < z_score <= z_score_significant_threshold:
        classification[value] = "reasonably higher"
    else:
        classification[value] = "significantly higher"

# Print the classification
for value, classif in classification.items():
    print(f"{value}: {classif}")


Number of groups: 3
Employee: normal
Supervisor: reasonably lower
Sr/Exec Leader: significantly higher


In [195]:
# The next goal is to try and figure out, for each value that is higher or lower, what is/are the driver(s) that are causing that to be the case
for value, classif in classification.items():
    if classif == "normal":
        continue
    is_higher = classif == "significantly higher" or classif == "reasonably higher"

    # Get the scores for each value in the group
    scores = df_summary.loc[value]

    # Get the scores of the drivers for all other values without that group
    other_values = df_summary.drop(value)
    other_values = other_values.apply(lambda x: x.mean(), axis=0)

    # Get the difference between the scores of the drivers for the value and the scores of the drivers for the other values
    diff = scores - other_values

    # Get the mean and standard deviation of the difference
    mean = diff.mean()
    std_dev = diff.std()

    # Calculate the Z-scores
    z_scores = (diff - mean) / std_dev

    # Get the drivers where the difference is more than 1 standard deviation away from the mean (in the direction of the classification)
    drivers = []
    for driver, z_score in z_scores.items():
        if is_higher and z_score > 1:
            drivers.append(driver)
        elif not is_higher and z_score < -1:
            drivers.append(driver)


    # Print the drivers
    print(f"The {group} {value} is {classif} than the organizational average and this is likely due to the {'high' if is_higher else 'low'} scores in the following {level.lower()}s compared to other {group}s:\n {', '.join(f'{driver} with a score of {scores[driver]}' for driver in drivers)}")

The Leadership Tier Supervisor is reasonably lower than the organizational average and this is likely due to the low scores in the following drivers compared to other Leadership Tiers:
 Team with a score of 21, Trust with a score of 17, Strategy with a score of 16, Investment with a score of 19
The Leadership Tier Sr/Exec Leader is significantly higher than the organizational average and this is likely due to the high scores in the following drivers compared to other Leadership Tiers:
 Team with a score of 76, Trust with a score of 72, Strategy with a score of 77, Execution with a score of 63


In [196]:
# Finally, we want to try and see if there are any value/driver combinations that go against the trends in the data. 
# For example, if there is a certain group that is the lowest on almost all drivers but is the highest in one of them, that would be very interesting and could give us
# some insight into what is going on in that group.

# Step 1: Calculate the organizational average for each driver
organizational_average = df_summary.mean()

# Step 2: Normalize scores by subtracting the organizational average
normalized_df = df_summary.sub(organizational_average, axis=1)

# Step 3: Analyze normalized data to identify divergences and interesting insights
for value in normalized_df.index:
    scores = normalized_df.loc[value]

    # Calculate the mean and standard deviation for this group
    group_mean = scores.mean()
    group_std_dev = scores.std()

    # Identify and store drivers that are notably high or low compared to the group's own average
    high_scores = scores[scores > group_mean + 1.5*group_std_dev]  # Adjust threshold as necessary
    low_scores = scores[scores < group_mean - 1.5*group_std_dev]   # Adjust threshold as necessary

    # Print notable high scores
    if not high_scores.empty:
        print(f"The {value} {group} scored notably higher in relation to both how it scored on average and to other {group}s in the following drivers:")
        for driver, score in high_scores.items():
            print(f"- {driver} (score: {abs(round(score))} points {'above' if score > 0 else 'below'} the organizational average)")

    # Print notable low scores
    if not low_scores.empty:
        print(f"The {value} {group} scored notably lower in relation to both how it scored on average and to other {group}s in the following drivers:")
        for driver, score in low_scores.items():
            print(f"- {driver} (score: {abs(round(score))} points {'above' if score > 0 else 'below'} the organizational average)")


The Employee Leadership Tier scored notably lower in relation to both how it scored on average and to other Leadership Tiers in the following drivers:
- Pride (score: 13 points below the organizational average)
- Execution (score: 15 points below the organizational average)
The Supervisor Leadership Tier scored notably higher in relation to both how it scored on average and to other Leadership Tiers in the following drivers:
- Supervisor (score: 4 points below the organizational average)
The Supervisor Leadership Tier scored notably lower in relation to both how it scored on average and to other Leadership Tiers in the following drivers:
- Strategy (score: 25 points below the organizational average)
The Sr/Exec Leader Leadership Tier scored notably higher in relation to both how it scored on average and to other Leadership Tiers in the following drivers:
- Strategy (score: 36 points above the organizational average)
- Execution (score: 34 points above the organizational average)
The Sr