In [18]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import anthropic

In [2]:
#extract data
data = pd.read_csv('processed_data_final.csv')
X = data.drop(columns=['JobSatisfaction_O'])
y = data['JobSatisfaction_O']

In [3]:
#split numerical & text columns, and Fixed & Actionable categories
numerical_fixed_columns = [col for col in X.columns if col.endswith('_F') and X[col].dtype == 'float64']
numerical_actionable_columns = [col for col in X.columns if col.endswith('_A') and X[col].dtype == 'float64']
binary_fixed_columns = [col for col in X.columns if col.endswith('_F') and X[col].dtype == 'int64']
binary_actionable_columns = [col for col in X.columns if col.endswith('_A') and X[col].dtype == 'int64']

In [1]:
#store questions & answers for binary columns
binary_actionable_groups = {}
for col in binary_actionable_columns:
    question_prefix = '_'.join(col.split('_')[:-2])
    if question_prefix not in binary_actionable_groups:
        binary_actionable_groups[question_prefix] = []
    binary_actionable_groups[question_prefix].append(col)

NameError: name 'binary_actionable_columns' is not defined

In [5]:
scaler = MinMaxScaler()
X[numerical_fixed_columns + numerical_actionable_columns] = scaler.fit_transform(X[numerical_fixed_columns + numerical_actionable_columns])

In [6]:
preprocessing_info = {
    'numerical_fixed_columns': numerical_fixed_columns,
    'numerical_actionable_columns': numerical_actionable_columns,
    'binary_fixed_columns': binary_fixed_columns,
    'binary_actionable_groups': binary_actionable_groups,
    'scaler': scaler
}

In [7]:
# Define the neural network model with dropout layers to reduce overfitting
class JobSatisfactionNN(nn.Module):
    def __init__(self, input_dim):
        super(JobSatisfactionNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.dropout1 = nn.Dropout(0.5)  
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.5)  
        self.fc3 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)  
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)  
        x = self.fc3(x)
        return x

In [9]:
# Load the model
input_dim = X.shape[1]
model = JobSatisfactionNN(input_dim)
model.load_state_dict(torch.load('final_model.pth'))
model.eval()


JobSatisfactionNN(
  (fc1): Linear(in_features=573, out_features=128, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
)

In [2]:
#function to query the impact on job satisfaction that each change in an actionable answer has
def sensitivity_analysis(model, X, row_number, preprocessing_info, y):
    original_data = X.iloc[row_number:row_number+1].copy()
    original_tensor = torch.tensor(original_data.values, dtype=torch.float32)

    with torch.no_grad():
        original_prediction = model(original_tensor).item()

    actionable_changes = []

    # Numerical Actionable Features
    for col in preprocessing_info['numerical_actionable_columns']:
        min_value = X[col].min()
        max_value = X[col].max()

        for new_value in [min_value, max_value]:
            modified_data = original_data.copy()
            modified_data[col] = new_value
            modified_tensor = torch.tensor(modified_data.values, dtype=torch.float32)

            with torch.no_grad():
                modified_prediction = model(modified_tensor).item()

            impact = modified_prediction - original_prediction
            actionable_changes.append((col, original_data[col].values[0], new_value, original_prediction, modified_prediction, impact))

    # Binary Actionable Features
    for group, cols in preprocessing_info['binary_actionable_groups'].items():
        for col in cols:
            if original_data[col].values[0] == 1:
                original_col = col
                break
        else:
            original_col = None

        for col in cols:
            if col != original_col:
                modified_data = original_data.copy()
                if original_col:
                    modified_data[original_col] = 0
                modified_data[col] = 1
                modified_tensor = torch.tensor(modified_data.values, dtype=torch.float32)

                with torch.no_grad():
                    modified_prediction = model(modified_tensor).item()

                impact = modified_prediction - original_prediction
                actionable_changes.append((col, original_data[col].values[0] if original_col else 0, 1, original_prediction, modified_prediction, impact))

    actionable_changes.sort(key=lambda x: x[5], reverse=True)
    return actionable_changes[:5]

In [32]:
row_number = 1001  # Index of the new user
top_changes = sensitivity_analysis(model, X, row_number, preprocessing_info, y)

# Display top changes
for change in top_changes:
    print(f"Feature: {change[0]}, Original Answer: {change[1]}, New Answer: {change[2]}, "
          f"Original Job Satisfaction: {change[3]:.4f}, Predicted New Job Satisfaction: {change[4]:.4f}, "
          f"Impact: {change[5]:.4f}")

Feature: RemoteWork_Always_A, Original Answer: 0, New Answer: 1, Original Job Satisfaction: -0.2621, Predicted New Job Satisfaction: 0.1659, Impact: 0.4280
Feature: MLMethodNextYearSelect_Factor Alysis_A, Original Answer: 0, New Answer: 1, Original Job Satisfaction: -0.2621, Predicted New Job Satisfaction: 0.0736, Impact: 0.3357
Feature: MLToolNextYearSelect_TIBCO Spotfire_A, Original Answer: 0, New Answer: 1, Original Job Satisfaction: -0.2621, Predicted New Job Satisfaction: 0.0602, Impact: 0.3223
Feature: MLToolNextYearSelect_RapidMiner (free version)_A, Original Answer: 0, New Answer: 1, Original Job Satisfaction: -0.2621, Predicted New Job Satisfaction: 0.0027, Impact: 0.2648
Feature: LanguageRecommendationSelect_Stata_A, Original Answer: 0, New Answer: 1, Original Job Satisfaction: -0.2621, Predicted New Job Satisfaction: -0.0032, Impact: 0.2588


In [3]:
#use llm api to re-write the results into natural language 
import os
client = anthropic.Anthropic(
    api_key=os.getenv('ANTHROPIC_API_KEY')
)


top_change = top_changes[0]
feature_name = top_change[0].replace('_', ' ').title()
original_answer = "Yes" if top_change[1] == 1 else "No"
new_answer = "Yes" if top_change[2] == 1 else "No"
original_js = top_change[3]
new_js = top_change[4]
impact = top_change[5]


feature = feature_name
original = original_answer
new = new_answer
initial_js = original_js
updated_js = new_js
change_impact = impact

message_content = f"Using the variables: feature ({feature}), original ({original}), new ({new}), initial job satisfaction ({initial_js:.4f}), updated job satisfaction ({updated_js:.4f}), and change impact ({change_impact:.4f}), construct a readable sentence that describes the most impactful change for improving job satisfaction."

message = client.messages.create(
    model="claude-3-haiku-20240307",
    max_tokens=1000,
    temperature=0.0,
    system="Rewrite in flowing English.",
    messages=[
        {"role": "user", "content": message_content}
    ]
)

print(message.content)

NameError: name 'anthropic' is not defined