# Talk-to-Listen Ratio Analysis
## Install the required packages

In [None]:
! pip install pandas statsmodels numpy scikit-learn scipy

## Calculate Talk-to-Listen Ratios

In [None]:
import main_ttlr

# Approximately takes 2 hours to run to do the analysis
talk_listen_df = main_ttlr.main()

talk_listen_df

## Clean Data
I have cleaned the data by removing ratios that are either 0.0 or 1.0. These ratios are not useful for the analysis as they do not provide any information about a conversation between two people.

In [14]:
# Remove rows that have 1.0 or 0.0 as the value for the ae_talk_ratio
talk_listen_df = talk_listen_df.loc[(talk_listen_df['ae_talk_ratio'] != 1.0) & (talk_listen_df['ae_talk_ratio'] != 0.0)]

talk_listen_df.head()

Unnamed: 0,id,title,ae_name,ae_email,sales_outcome,date,meeting_attendees,host_email,transcript_url,video_url,audio_url,total_duration,ae_talk_duration,client_talk_duration,no_talk_duration,ae_talk_ratio,client_talk_ratio,ae_talk_ratio_duration,client_talk_ratio_duration,no_talk_ratio_duration
0,noufJNYz4sHSwwYc,Mohammad Nadeem Karim <> My Amazon Guy,Shawn Henderson,shawn.henderson@myamazonguy.com,closed_won,2023/07/24,"audray.alcordo@myamazonguy.com, shawn.henderso...",audray.alcordo@myamazonguy.com,https://app.fireflies.ai/view/noufJNYz4sHSwwYc,https://cdn.fireflies.ai/noufJNYz4sHSwwYc/vide...,https://cdn.fireflies.ai/noufJNYz4sHSwwYc/audi...,2409.928,1525.246,453.766,430.916,0.770711,0.229289,0.632901,0.18829,0.178809
2,WJ436UP28tj1eR4p,Chris Meet with My Amazon Guy,Shawn Henderson,shawn.henderson@myamazonguy.com,closed_won,2023/05/12,"heidelmanc@gmail.com, shawn.henderson@myamazon...",shawn.henderson@myamazonguy.com,https://app.fireflies.ai/view/WJ436UP28tj1eR4p,https://cdn.fireflies.ai/WJ436UP28tj1eR4p/vide...,https://cdn.fireflies.ai/WJ436UP28tj1eR4p/audi...,3682.786,1410.776,1795.782,476.228,0.439966,0.560034,0.383073,0.487615,0.129312
3,trQWJhhuamfX6Yxz,Chris Meet with My Amazon Guy,Shawn Henderson,shawn.henderson@myamazonguy.com,closed_won,2023/03/16,"chris@mymopshop.com, shawn.henderson@myamazong...",shawn.henderson@myamazonguy.com,https://app.fireflies.ai/view/trQWJhhuamfX6Yxz,https://cdn.fireflies.ai/trQWJhhuamfX6Yxz/vide...,https://cdn.fireflies.ai/trQWJhhuamfX6Yxz/audi...,2085.458,1013.148,770.778,301.532,0.567932,0.432068,0.485816,0.369597,0.144588
4,6SQGaUHHiwdrAvm1,kevin <> My Amazon Guy,John Aspinall,john.aspinall@myamazonguy.com,closed_won,2023/08/21,"sddrkevin@yahoo.com, john.aspinall@myamazonguy...",john.aspinall@myamazonguy.com,https://app.fireflies.ai/view/6SQGaUHHiwdrAvm1,https://cdn.fireflies.ai/6SQGaUHHiwdrAvm1/vide...,https://cdn.fireflies.ai/6SQGaUHHiwdrAvm1/audi...,992.984,359.47,541.352,92.162,0.399047,0.600953,0.36201,0.545177,0.092813
5,70F1yZvLPwgEn2i0,Ongaro Beauty <> My Amazon Guy,Shawn Henderson,shawn.henderson@myamazonguy.com,closed_won,2023/07/06,"airine.francisco@myamazonguy.com, brian@ongaro...",airine.francisco@myamazonguy.com,https://app.fireflies.ai/view/70F1yZvLPwgEn2i0,https://cdn.fireflies.ai/70F1yZvLPwgEn2i0/vide...,https://cdn.fireflies.ai/70F1yZvLPwgEn2i0/audi...,3388.13,2488.698,634.236,265.196,0.79691,0.20309,0.734534,0.187194,0.078272


## Correlation Analysis

In [48]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

talk_listen_df['sales_outcome_binary'] = talk_listen_df['sales_outcome'].map({'closed_won': 1, 'closed_lost': 0})

# Select variables for analysis
variables = [
    'ae_talk_ratio', 'client_talk_ratio', 'total_duration',
    'ae_talk_duration', 'client_talk_duration'
]

# Initialize lists to store results
corr_list = []

# Calculate correlation coefficients and p-values
for var in variables:
    corr_coef, p_value = stats.pearsonr(talk_listen_df[var], talk_listen_df['sales_outcome_binary'])
    corr_list.append({
        'Variable': var,
        'Correlation with Sales Outcome': corr_coef,
        'P-value (Correlation)': p_value
    })

# Create a DataFrame from the list
corr_df = pd.DataFrame(corr_list)


## Logistic Regression

In [50]:
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

# Ensure no missing values in the relevant columns
talk_listen_df = talk_listen_df.dropna(subset=['ae_talk_ratio', 'total_duration', 'sales_outcome_binary'])

# Define the predictors and the target variable
X_new = talk_listen_df[['ae_talk_ratio', 'total_duration']]
y = talk_listen_df['sales_outcome_binary']

# Standardize the predictors
scaler = StandardScaler()
X_scaled_array = scaler.fit_transform(X_new)
X_scaled = pd.DataFrame(X_scaled_array, columns=X_new.columns, index=X_new.index)

# Add constant term for intercept
X_scaled = sm.add_constant(X_scaled)

# Fit the logistic regression model
logit_model = sm.Logit(y, X_scaled)
result = logit_model.fit(method='newton', maxiter=200)

# Get the summary of regression results
regression_summary = result.summary2().tables[1]
print(result.summary2())


Optimization terminated successfully.
         Current function value: 0.595977
         Iterations 5
                           Results: Logit
Model:              Logit                Method:           MLE       
Dependent Variable: sales_outcome_binary Pseudo R-squared: 0.012     
Date:               2024-10-18 14:11     AIC:              2501.9503 
No. Observations:   2094                 BIC:              2518.8908 
Df Model:           2                    Log-Likelihood:   -1248.0   
Df Residuals:       2091                 LL-Null:          -1263.4   
Converged:          1.0000               LLR p-value:      2.1022e-07
No. Iterations:     5.0000               Scale:            1.0000    
----------------------------------------------------------------------
                  Coef.   Std.Err.     z      P>|z|    [0.025   0.975]
----------------------------------------------------------------------
const            -0.9016    0.0486  -18.5338  0.0000  -0.9970  -0.8063
ae_talk_rati

## Regression Coefficients and P-values

In [52]:
# Reset index to turn 'Variable' into a column
regression_df = regression_summary.reset_index()

# Rename columns for clarity
regression_df.rename(columns={
    'index': 'Variable',
    'Coef.': 'Regression Coefficient',
    'P>|z|': 'P-value (Regression)'
}, inplace=True)

# Remove 'const' from the variables
regression_df = regression_df[regression_df['Variable'] != 'const']


## Interpret Results

In [53]:
# Merge correlation and regression data
merged_df = pd.merge(
    corr_df,
    regression_df[['Variable', 'Regression Coefficient', 'P-value (Regression)']],
    on='Variable',
    how='left'
)

# Define interpretation based on p-value
def interpret_p_value(p_value):
    if pd.isnull(p_value):
        return 'N/A'
    elif p_value < 0.05:
        return 'Good Predictor'
    else:
        return 'Not a Good Predictor'

# Apply the interpretation to the merged DataFrame
merged_df['Interpretation'] = merged_df['P-value (Regression)'].apply(interpret_p_value)

## Present Results

In [55]:
# Rearranging columns for presentation
final_df = merged_df[[
    'Variable',
    'Correlation with Sales Outcome',
    'P-value (Correlation)',
    'Regression Coefficient',
    'P-value (Regression)',
    'Interpretation'
]]

# Format numerical values for better readability
final_df['Correlation with Sales Outcome'] = final_df['Correlation with Sales Outcome'].round(4)
final_df['P-value (Correlation)'] = final_df['P-value (Correlation)'].apply(lambda x: f"{x:.3f}")
final_df['Regression Coefficient'] = final_df['Regression Coefficient'].round(4)
final_df['P-value (Regression)'] = final_df['P-value (Regression)'].apply(lambda x: f"{x:.3f}" if not pd.isnull(x) else 'N/A')

# Display the final table
final_df


Unnamed: 0,Variable,Correlation with Sales Outcome,P-value (Correlation),Regression Coefficient,P-value (Regression),Interpretation
0,ae_talk_ratio,-0.0158,0.471,-0.6063,1.0,Not a Good Predictor
1,client_talk_ratio,0.0158,0.471,-0.3055,1.0,Not a Good Predictor
2,total_duration,0.121,0.0,0.0003,0.0,Good Predictor
3,ae_talk_duration,0.1099,0.0,,,
4,client_talk_duration,0.0952,0.0,,,
