In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from scipy import stats
import statsmodels.api as sm
import tqdm


In [2]:
predictive_processing_dataset = pd.read_excel("All_Features_dataset.xlsx")
scales = pd.read_excel("filled_scales_BEBRASK_RETOS.xlsx")


In [3]:
scales.drop(["SUBJECT_CODE","Age"],axis=1,inplace=True)

common_values = scales['EPRIME_CODE'].unique()
predictive_processing_dataset = predictive_processing_dataset[predictive_processing_dataset['Subject'].isin(common_values)]


In [4]:
metrics_columns = scales.drop("EPRIME_CODE",axis=1).columns.values
predictive_processing_columns = predictive_processing_dataset.drop("Subject",axis=1).columns.values

In [5]:
feature_scales = pd.merge(left = predictive_processing_dataset,right=scales, left_on="Subject",right_on="EPRIME_CODE").drop(["EPRIME_CODE","Subject"],axis=1)

In [6]:
values = list(range(5, 96, 5))

In [7]:
feature_scales[predictive_processing_columns]

Unnamed: 0,Mean_Rating0,Mean_Rating0_Match,Mean_Rating0_No_Match,Mean_Rating0_Match_Happy,Mean_Rating0_No_Match_Happy,Mean_Rating0_Match_Sad,Mean_Rating0_No_Match_Sad,Mean_Rating0_Match_Fear,Mean_Rating0_No_Match_Fear,Dif_Match,...,Intercept_Match_Fear,Intercept_No_Match_Fear,Final_Value_Match,Final_Value_No_Match,Final_Value_Match_Happy,Final_Value_No_Match_Happy,Final_Value_Match_Sad,Final_Value_No_Match_Sad,Final_Value_Match_Fear,Final_Value_No_Match_Fear
0,2.888889,3.555556,1.888889,3.888889,1.500000,3.555556,2.166667,3.222222,2.000000,1.666667,...,2.972222,3.400000,3.492063,1.502924,3.622222,0.428571,3.622222,2.666667,3.422222,1.000000
1,2.533333,3.000000,1.833333,3.333333,1.333333,2.555556,1.833333,3.111111,2.333333,1.166667,...,3.444444,1.333333,2.801587,2.140351,2.933333,1.047619,2.755556,2.047619,2.844444,3.047619
2,2.311111,3.000000,1.277778,3.555556,1.166667,2.666667,1.500000,2.777778,1.166667,1.722222,...,2.944444,0.866667,3.158730,1.304094,3.822222,1.380952,3.133333,1.142857,2.644444,1.380952
3,2.555556,3.148148,1.666667,3.222222,1.000000,3.000000,2.666667,3.222222,1.333333,1.481481,...,3.305556,1.333333,3.584656,1.543860,3.822222,1.000000,3.733333,1.523810,3.155556,1.333333
4,2.266667,2.851852,1.388889,3.666667,1.166667,2.666667,1.333333,2.222222,1.666667,1.462963,...,1.222222,2.666667,3.367725,1.432749,3.200000,1.095238,3.066667,1.761905,3.022222,0.952381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144,2.088889,2.555556,1.388889,3.111111,1.500000,2.111111,1.333333,2.444444,1.333333,1.166667,...,2.361111,0.333333,2.722222,1.836257,3.711111,1.857143,1.644444,1.619048,2.511111,2.047619
145,2.422222,3.074074,1.444444,3.666667,1.166667,2.777778,1.333333,2.777778,1.833333,1.629630,...,2.861111,0.933333,3.256614,1.654971,3.466667,1.523810,2.511111,0.619048,2.711111,2.476190
146,2.622222,3.222222,1.722222,3.555556,1.000000,2.888889,1.833333,3.222222,2.333333,1.500000,...,3.055556,2.533333,3.063492,1.994152,2.755556,1.000000,2.488889,2.047619,3.355556,2.190476
147,2.577778,3.222222,1.611111,3.777778,1.333333,3.000000,1.666667,2.888889,1.833333,1.611111,...,3.888889,1.933333,2.904762,1.795322,4.111111,1.761905,2.600000,1.523810,2.088889,1.761905


In [9]:

dataframes = {}
all_dictionary = {}
for value in values:
    
# Assuming feature_scales is your DataFrame with features as columns and metrics as rows
    dictionary_distribution = {}

    for metric in metrics_columns:
        dictionary_distribution[metric] = {}
        
        
        for feature in predictive_processing_columns:
            percentile = np.percentile(feature_scales[feature],value)
            if value < 50:
                above_values = feature_scales[feature_scales[feature] > percentile][metric]  
                below_values = feature_scales[feature_scales[feature] <= percentile][metric]  
            else:
                above_values = feature_scales[feature_scales[feature] >= percentile][metric]  
                below_values = feature_scales[feature_scales[feature] < percentile][metric]  

            u_stat, p_value = stats.mannwhitneyu(above_values, below_values, alternative='two-sided')    
            # Calculate means
            high_mean = np.mean(above_values)
            low_mean = np.mean(below_values)
    
            dictionary_distribution[metric][feature] = {'high_mean': high_mean, 'size_high': len(above_values),
                                                       'low_mean': low_mean, 'size_low': len(below_values),
                                                       'p_value': p_value}

    all_dictionary[value] = dictionary_distribution
    df = pd.DataFrame.from_dict({(metric, feature): values
                                 for metric, features in dictionary_distribution.items()
                                 for feature, values in features.items()}, orient='index')

    # Add this DataFrame to the main dictionary of DataFrames
    dataframes[value] = df

In [10]:
p_value_comparison = {}

# Iterate through each metric
for metric in metrics_columns:
    # Initialize a list to store mean p-values per percentile value
    mean_p_values = []

    for percentile in values:
        # Extract p-values for the current percentile dataset
        p_values = dataframes[percentile].xs(metric, level=0)['p_value']
        mean_p_values.append((percentile, p_values.mean()))

    # Find the percentile dataset with the lowest mean p-value
    lowest_p_value_percentile = min(mean_p_values, key=lambda x: x[1])

    # Store the results in the dictionary
    p_value_comparison[metric] = {
        'Percentile with Lowest Mean P-Value': lowest_p_value_percentile[0],
        'Mean P-Value': lowest_p_value_percentile[1]
    }

# Display the comparison results
for metric, result in p_value_comparison.items():
    print(f"\nMetric: {metric}")
    print(f"Percentile with Lowest Mean P-Value: {result['Percentile with Lowest Mean P-Value']}")
    print(f"Mean P-Value: {result['Mean P-Value']}")


Metric: PA
Percentile with Lowest Mean P-Value: 45
Mean P-Value: 0.27726910382660724

Metric: NA.
Percentile with Lowest Mean P-Value: 30
Mean P-Value: 0.40954996442424746

Metric: ERQ_CR
Percentile with Lowest Mean P-Value: 35
Mean P-Value: 0.3501655553214898

Metric: ERQ_ES
Percentile with Lowest Mean P-Value: 85
Mean P-Value: 0.4416079683513129

Metric: UPPSP_NU
Percentile with Lowest Mean P-Value: 20
Mean P-Value: 0.45631176180307975

Metric: UPPSP_PU
Percentile with Lowest Mean P-Value: 70
Mean P-Value: 0.4190106496840595

Metric: UPPSP_SS
Percentile with Lowest Mean P-Value: 30
Mean P-Value: 0.42564935339939064

Metric: UPPSP_PMD
Percentile with Lowest Mean P-Value: 70
Mean P-Value: 0.4214795703957438

Metric: UPPSP_PSV
Percentile with Lowest Mean P-Value: 85
Mean P-Value: 0.3200573960635867

Metric: BIS
Percentile with Lowest Mean P-Value: 90
Mean P-Value: 0.3898690414559161

Metric: BAS_RR
Percentile with Lowest Mean P-Value: 40
Mean P-Value: 0.40403225362996337

Metric: BAS_D

In [11]:
# Initialize a dictionary to store the top three significant counts per metric
top_significant_counts_per_metric = {metric: [] for metric in metrics_columns}

# Iterate through each metric to determine the top three percentiles with the most significant features
for metric in metrics_columns:
    # Create a list to hold the counts for each percentile for this metric
    metric_counts = []

    for percentile, df in dataframes.items():
        # Filter only rows corresponding to the current metric
        p_values = df.xs(metric, level=0)['p_value']

        # Count the number of features with p-values below 0.05
        significant_count = (p_values < 0.05).sum()

        # Add the count to the list
        metric_counts.append((percentile, significant_count))

    # Sort the list of counts in descending order by count
    metric_counts.sort(key=lambda x: x[1], reverse=True)

    # Keep only the top three counts
    top_significant_counts_per_metric[metric] = metric_counts[:3]

# Display the results
for metric, top_counts in top_significant_counts_per_metric.items():
    print(f"\nMetric: {metric}")
    for rank, (percentile, count) in enumerate(top_counts, 1):
        print(f"  {rank}- Highest: Percentile {percentile} with {count} features having significant p-values")




Metric: PA
  1- Highest: Percentile 45 with 13 features having significant p-values
  2- Highest: Percentile 75 with 12 features having significant p-values
  3- Highest: Percentile 30 with 11 features having significant p-values

Metric: NA.
  1- Highest: Percentile 20 with 5 features having significant p-values
  2- Highest: Percentile 30 with 4 features having significant p-values
  3- Highest: Percentile 45 with 4 features having significant p-values

Metric: ERQ_CR
  1- Highest: Percentile 35 with 9 features having significant p-values
  2- Highest: Percentile 55 with 8 features having significant p-values
  3- Highest: Percentile 25 with 7 features having significant p-values

Metric: ERQ_ES
  1- Highest: Percentile 10 with 4 features having significant p-values
  2- Highest: Percentile 75 with 4 features having significant p-values
  3- Highest: Percentile 5 with 3 features having significant p-values

Metric: UPPSP_NU
  1- Highest: Percentile 40 with 3 features having signific

In [12]:
# Initialize a dictionary to store counts of significant p-values per feature across all percentiles
significant_counts_aggregate = {feature: [] for feature in predictive_processing_columns}

# Iterate through each percentile and count the significant p-values per feature
for percentile in values:
    df = dataframes[percentile]
    
    # Group by feature only (ignoring the metric)
    significant_count = df['p_value'].groupby(level=1).apply(lambda x: (x < 0.05).sum())
    
    # Append the count for each feature to its corresponding list
    for feature, count in significant_count.items():
        significant_counts_aggregate[feature].append((percentile, count))

# Print the results per feature
for feature, counts in significant_counts_aggregate.items():
    print(f"\nFeature: {feature}")
    for percentile, count in counts:
        print(f"  Percentile {percentile}: {count} significant p-values")



Feature: Mean_Rating0
  Percentile 5: 1 significant p-values
  Percentile 10: 1 significant p-values
  Percentile 15: 1 significant p-values
  Percentile 20: 3 significant p-values
  Percentile 25: 1 significant p-values
  Percentile 30: 3 significant p-values
  Percentile 35: 0 significant p-values
  Percentile 40: 2 significant p-values
  Percentile 45: 3 significant p-values
  Percentile 50: 3 significant p-values
  Percentile 55: 3 significant p-values
  Percentile 60: 1 significant p-values
  Percentile 65: 1 significant p-values
  Percentile 70: 1 significant p-values
  Percentile 75: 1 significant p-values
  Percentile 80: 1 significant p-values
  Percentile 85: 0 significant p-values
  Percentile 90: 0 significant p-values
  Percentile 95: 0 significant p-values

Feature: Mean_Rating0_Match
  Percentile 5: 1 significant p-values
  Percentile 10: 0 significant p-values
  Percentile 15: 2 significant p-values
  Percentile 20: 5 significant p-values
  Percentile 25: 1 significant

In [13]:
# Initialize a dictionary to store the top three counts of significant p-values per feature
top_significant_counts_per_feature = {feature: [] for feature in predictive_processing_columns}

# Iterate through each percentile and count the significant p-values per feature
for percentile in values:
    df = dataframes[percentile]

    # Group by feature only (ignoring the metric)
    significant_count = df['p_value'].groupby(level=1).apply(lambda x: (x < 0.05).sum())

    # Append the count for each feature to its list
    for feature, count in significant_count.items():
        top_significant_counts_per_feature[feature].append((percentile, count))

# For each feature, sort the counts and keep only the top three
for feature, counts in top_significant_counts_per_feature.items():
    # Sort by the count (second element of the tuple) in descending order
    counts.sort(key=lambda x: x[1], reverse=True)
    # Keep only the top three
    top_significant_counts_per_feature[feature] = counts[:3]

# Print the results showing the top three counts for each feature
for feature, top_counts in top_significant_counts_per_feature.items():
    print(f"\nFeature: {feature}")
    for rank, (percentile, count) in enumerate(top_counts, 1):
        print(f"  {rank}- Highest: Percentile {percentile} with {count} significant p-values")



Feature: Mean_Rating0
  1- Highest: Percentile 20 with 3 significant p-values
  2- Highest: Percentile 30 with 3 significant p-values
  3- Highest: Percentile 45 with 3 significant p-values

Feature: Mean_Rating0_Match
  1- Highest: Percentile 20 with 5 significant p-values
  2- Highest: Percentile 45 with 4 significant p-values
  3- Highest: Percentile 50 with 4 significant p-values

Feature: Mean_Rating0_No_Match
  1- Highest: Percentile 10 with 3 significant p-values
  2- Highest: Percentile 15 with 3 significant p-values
  3- Highest: Percentile 20 with 3 significant p-values

Feature: Mean_Rating0_Match_Happy
  1- Highest: Percentile 45 with 4 significant p-values
  2- Highest: Percentile 55 with 4 significant p-values
  3- Highest: Percentile 60 with 4 significant p-values

Feature: Mean_Rating0_No_Match_Happy
  1- Highest: Percentile 65 with 5 significant p-values
  2- Highest: Percentile 70 with 5 significant p-values
  3- Highest: Percentile 75 with 5 significant p-values

Fe

In [14]:
# Initialize a list to store the total count of significant p-values for each percentile
total_significant_counts_per_percentile = []

# Iterate through each percentile and count the significant p-values
for percentile in values:
    df = dataframes[percentile]
    
    # Count the number of significant p-values across all features and metrics
    total_significant_count = (df['p_value'] < 0.05).sum()
    
    # Store the total count with its corresponding percentile
    total_significant_counts_per_percentile.append((percentile, total_significant_count))

# Sort the percentiles by the total count of significant p-values in descending order
sorted_significant_counts = sorted(total_significant_counts_per_percentile, key=lambda x: x[1], reverse=True)

# Print the sorted list
print("\nOrdered list of percentiles by significant p-values count (descending):")
for rank, (percentile, count) in enumerate(sorted_significant_counts, 1):
    print(f"{rank}. Percentile {percentile}: {count} significant p-values")




Ordered list of percentiles by significant p-values count (descending):
1. Percentile 90: 107 significant p-values
2. Percentile 45: 102 significant p-values
3. Percentile 30: 100 significant p-values
4. Percentile 80: 99 significant p-values
5. Percentile 95: 98 significant p-values
6. Percentile 40: 94 significant p-values
7. Percentile 75: 91 significant p-values
8. Percentile 35: 90 significant p-values
9. Percentile 85: 90 significant p-values
10. Percentile 50: 86 significant p-values
11. Percentile 60: 86 significant p-values
12. Percentile 70: 85 significant p-values
13. Percentile 55: 84 significant p-values
14. Percentile 20: 82 significant p-values
15. Percentile 25: 80 significant p-values
16. Percentile 65: 79 significant p-values
17. Percentile 15: 65 significant p-values
18. Percentile 5: 49 significant p-values
19. Percentile 10: 48 significant p-values


In [15]:
from collections import defaultdict

# Initialize a dictionary to store feature counts and percentiles for each metric
feature_counts_and_percentiles_per_metric = {
    metric: defaultdict(lambda: {"count": 0, "percentiles": []}) for metric in metrics_columns
}

# Iterate through each percentile DataFrame to count significant features per metric
for percentile, df in dataframes.items():
    for metric in metrics_columns:
        # Extract rows corresponding to the current metric
        metric_data = df.xs(metric, level=0)

        # Increment the count and record the percentile for features where the p-value is below 0.05
        for feature, row in metric_data.iterrows():
            if row['p_value'] < 0.05:
                feature_info = feature_counts_and_percentiles_per_metric[metric][feature]
                feature_info["count"] += 1
                feature_info["percentiles"].append(percentile)

# Determine the top 5 features for each metric
top_features_per_metric = {}

for metric, feature_data in feature_counts_and_percentiles_per_metric.items():
    # Sort features by their count in descending order and get the top 5
    sorted_features = sorted(feature_data.items(), key=lambda x: x[1]["count"], reverse=True)[:5]
    top_features_per_metric[metric] = sorted_features

# Print the results
for metric, top_features in top_features_per_metric.items():
    print(f"\nTop 5 features for metric {metric}:")
    for rank, (feature, data) in enumerate(top_features, 1):
        percentiles_str = ", ".join(map(str, sorted(data["percentiles"])))
        print(f"{rank}. Feature {feature}: {data['count']} significant appearances (percentiles: {percentiles_str})")



Top 5 features for metric PA:
1. Feature Mean_Rating0: 14 significant appearances (percentiles: 5, 10, 15, 20, 30, 40, 45, 50, 55, 60, 65, 70, 75, 80)
2. Feature Final_Value_No_Match_Sad: 14 significant appearances (percentiles: 5, 10, 15, 20, 25, 35, 40, 45, 50, 65, 70, 75, 85, 90)
3. Feature Mean_Rating0_Match_Sad: 13 significant appearances (percentiles: 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 75, 80, 85)
4. Feature Mean_Rating0_Match: 11 significant appearances (percentiles: 15, 20, 40, 45, 50, 55, 60, 65, 70, 75, 80)
5. Feature Final_Value_Match_Fear: 10 significant appearances (percentiles: 30, 55, 60, 65, 70, 75, 80, 85, 90, 95)

Top 5 features for metric NA.:
1. Feature Intercept_No_Match_Happy: 12 significant appearances (percentiles: 15, 20, 25, 30, 35, 40, 45, 60, 65, 70, 75, 80)
2. Feature Intercept_Match_Fear: 7 significant appearances (percentiles: 15, 20, 25, 30, 35, 45, 50)
3. Feature Trend_No_Match_Happy: 7 significant appearances (percentiles: 20, 25, 50, 55, 60, 65,

In [16]:
dataframes[85][dataframes[85]["p_value"]<0.05]

Unnamed: 0,Unnamed: 1,high_mean,size_high,low_mean,size_low,p_value
PA,Mean_Rating0_Match_Sad,36.103448,29,32.550000,120,0.004800
PA,Mean_Rating0_No_Match_Sad,35.840000,25,32.717742,124,0.033720
PA,Dif_Happy,36.375000,24,32.640000,125,0.006288
PA,Cor_Pred_Like_Match_Happy,35.826087,23,32.769841,126,0.042310
PA,Trend_No_Match,35.695652,23,32.793651,126,0.048242
...,...,...,...,...,...,...
MSSB_NEG,Mean_Rating0_Match_Happy,0.961538,26,2.040650,123,0.013863
MSSB_NEG,Dif_Fear,2.608696,23,1.714286,126,0.009363
MSSB_NEG,Intercept_No_Match_Sad,2.347826,23,1.761905,126,0.045557
MSSB_DES,Mean_Rating0_Match_Fear,3.541667,24,2.008000,125,0.001061


In [17]:
dataframes[45][dataframes[45]["p_value"]<0.05]

Unnamed: 0,Unnamed: 1,high_mean,size_high,low_mean,size_low,p_value
PA,Mean_Rating0,34.341463,82,31.895522,67,0.018508
PA,Mean_Rating0_Match,34.527778,72,32.038961,77,0.010367
PA,Mean_Rating0_No_Match,34.368421,76,32.068493,73,0.044310
PA,Mean_Rating0_Match_Happy,34.676471,68,32.037037,81,0.009845
PA,Mean_Rating0_Match_Sad,34.308642,81,31.970588,68,0.020765
...,...,...,...,...,...,...
MSSB_NEG,Intercept_No_Match_Happy,2.150000,60,1.651685,89,0.022139
MSSB_DES,Mean_Rating0_No_Match,2.578947,76,1.917808,73,0.009391
MSSB_DES,Cor_Pred_Like_No_Match_Happy,2.600000,55,2.053191,94,0.008427
MSSB_DES,Intercept_No_Match,2.682927,82,1.731343,67,0.002016


### Bootstrapping with resampling in order to find robust cut-off against different data

In [26]:
epoch = 50
dict_bootstrap = {}


#dataframes = {}
for i in tqdm.tqdm(range(epoch),desc="Bootstraping"):
    sample_indices = np.random.choice(feature_scales.index, size=len(feature_scales), replace=True)
    sample_df = feature_scales.loc[sample_indices]
    all_dictionary = {}

    # Cluster the bootstrap sample
    for value in values:
    
        # Assuming feature_scales is your DataFrame with features as columns and metrics as rows
        dictionary_distribution = {}
    
        for metric in metrics_columns:
            dictionary_distribution[metric] = {}
    
            for feature in predictive_processing_columns:
                percentile = np.percentile(sample_df[feature], value)
                if value < 50:
                    above_values = sample_df[sample_df[feature] > percentile][metric]
                    below_values = sample_df[sample_df[feature] <= percentile][metric]
                else:
                    above_values = sample_df[sample_df[feature] >= percentile][metric]
                    below_values = sample_df[sample_df[feature] < percentile][metric]
                try:
                    u_stat, p_value = stats.mannwhitneyu(above_values, below_values, alternative='two-sided')
                    # Calculate means
                    high_mean = np.mean(above_values)
                    low_mean = np.mean(below_values)
        
                    dictionary_distribution[metric][feature] = {'high_mean': high_mean, 'size_high': len(above_values),
                                                                'low_mean': low_mean, 'size_low': len(below_values),
                                                                'p_value': p_value}
                except:
                    dictionary_distribution[metric][feature] = {'high_mean': 0, 'size_high': 0,
                                                                'low_mean': 0, 'size_low': 0,
                                                                'p_value': 1}
    
        all_dictionary[value] = dictionary_distribution
        df = pd.DataFrame.from_dict({(metric, feature): values
                                     for metric, features in dictionary_distribution.items()
                                     for feature, values in features.items()}, orient='index')
    
        # Add this DataFrame to the main dictionary of DataFrames
        #dataframes[value] = df
    
    dict_bootstrap[i] = all_dictionary

Bootstraping: 100%|██████████| 50/50 [39:30<00:00, 47.41s/it]


In [41]:
aggregate_stats = {value: {metric: defaultdict(lambda: {'p_values': [], 'high_means': [], 'low_means': [], 'size_highs': [], 'size_lows': []}) for metric in metrics_columns} for value in values}

for i in range(epoch):
    for value in values:
        for metric, features in dict_bootstrap[i][value].items():
            for feature, data in features.items():
                agg = aggregate_stats[value][metric][feature]
                agg['p_values'].append(data['p_value'])
                agg['high_means'].append(data['high_mean'])
                agg['low_means'].append(data['low_mean'])
                agg['size_highs'].append(data['size_high'])
                agg['size_lows'].append(data['size_low'])

# Compute mean and variance of p_values, and mean of other stats, then filter where mean p_value is < 0.05
significant_features = {}

for value in values:
    for metric, features in aggregate_stats[value].items():
        for feature, stats in features.items():
            mean_p = np.mean(stats['p_values'])
            var_p = np.var(stats['p_values'])
            mean_high_mean = np.mean(stats['high_means'])
            mean_low_mean = np.mean(stats['low_means'])
            mean_size_high = np.mean(stats['size_highs'])
            mean_size_low = np.mean(stats['size_lows'])
            if mean_p < 0.065:
                if metric not in significant_features:
                    significant_features[metric] = {}
                significant_features[metric][feature] = {
                    'mean_p_value': mean_p,
                    'variance': var_p,
                    'mean_high_mean': mean_high_mean,
                    'mean_low_mean': mean_low_mean,
                    'mean_size_high': mean_size_high,
                    'mean_size_low': mean_size_low
                }

# Display the results
for metric, features in significant_features.items():
    print(f"\nMetric: {metric}")
    for feature, stats in features.items():
        print(f"Feature: {feature}")
        print(f"  Mean p-value: {stats['mean_p_value']:.4f}, Variance: {stats['variance']:.4f}")
        print(f"  Mean High Mean: {stats['mean_high_mean']:.2f}, Mean Low Mean: {stats['mean_low_mean']:.2f}")
        print(f"  Mean Size High: {stats['mean_size_high']:.2f}, Mean Size Low: {stats['mean_size_low']:.2f}")




Metric: PA
Feature: Final_Value_No_Match_Sad
  Mean p-value: 0.0565, Variance: 0.0132
  Mean High Mean: 33.97, Mean Low Mean: 30.47
  Mean Size High: 117.46, Mean Size Low: 31.54
Feature: Mean_Rating0_No_Match_Fear
  Mean p-value: 0.0615, Variance: 0.0118
  Mean High Mean: 34.23, Mean Low Mean: 31.19
  Mean Size High: 99.76, Mean Size Low: 49.24
Feature: Intercept_Match
  Mean p-value: 0.0516, Variance: 0.0218
  Mean High Mean: 34.29, Mean Low Mean: 31.28
  Mean Size High: 96.34, Mean Size Low: 52.66
Feature: Trend_Match_Fear
  Mean p-value: 0.0505, Variance: 0.0110
  Mean High Mean: 34.63, Mean Low Mean: 31.77
  Mean Size High: 75.80, Mean Size Low: 73.20
Feature: Cor_Pred_Like_No_Match_Happy
  Mean p-value: 0.0605, Variance: 0.0171
  Mean High Mean: 33.69, Mean Low Mean: 29.46
  Mean Size High: 132.70, Mean Size Low: 16.30
Feature: Mean_Rating0
  Mean p-value: 0.0496, Variance: 0.0072
  Mean High Mean: 35.14, Mean Low Mean: 32.33
  Mean Size High: 47.72, Mean Size Low: 101.28
Featur