# Panas

In [15]:
import pandas as pd

# Load the data from the CSV file
file_path = './survey/panas.csv'
panas_data = pd.read_csv(file_path)

# Display the first few rows of the dataframe and its columns to understand its structure
panas_data.head(), panas_data.columns

(   uid type  Interested  Distressed  Upset  Strong  Guilty  Scared  Hostile   \
 0  u00  pre           5           4      3       4     3.0       5         5   
 1  u01  pre           4           2      2       2     5.0       1         1   
 2  u02  pre           4           1      1       2     2.0       2         1   
 3  u03  pre           4           2      2       4     5.0       5         5   
 4  u04  pre           4           2      1       3     3.0       1         1   
 
    Enthusiastic  Proud  Irritable  Alert  Inspired  Nervous  Determined   \
 0             3      3          3      4       4.0      NaN            4   
 1             3      3          2      4       3.0      3.0            4   
 2             3      2          2      3       2.0      2.0            2   
 3             3      1          1      5       3.0      1.0            3   
 4             3      3          2      3       1.0      2.0            3   
 
    Attentive  Jittery  Active   Afraid   
 0   

In [16]:
for column in panas_data.columns[2:5]:  # Check first three survey columns
    print(f"Unique values in {column}: {panas_data[column].astype(str).str.strip().str.title().unique()}")
    
print("NaN counts after mapping:")
print(panas_data.isna().sum())

panas_data.fillna(3, inplace=True)  # Assuming 3 is the neutral score

Unique values in Interested: ['5' '4' '3' '2' '1']
Unique values in Distressed: ['4' '2' '1' '3' '5']
Unique values in Upset: ['3' '2' '1' '5' '4']
NaN counts after mapping:
uid             0
type            0
Interested      0
Distressed      0
Upset           0
Strong          0
Guilty          1
Scared          0
Hostile         0
Enthusiastic    0
Proud           0
Irritable       0
Alert           0
Inspired        1
Nervous         1
Determined      0
Attentive       1
Jittery         1
Active          0
Afraid          0
dtype: int64


In [17]:
print("Unique 'type' values before adjustment:", panas_data['type'].unique())

# Normalize 'type' field for consistency
panas_data['type'] = panas_data['type'].str.strip().str.title()

# Verify 'uid' consistency and format
print("Unique 'uid' values check (sample):", panas_data['uid'].sample(5))

# Identify participants who have both Pre and Post data
pre_participants = set(panas_data[panas_data['type'] == 'Pre']['uid'])
post_participants = set(panas_data[panas_data['type'] == 'Post']['uid'])
common_participants = pre_participants & post_participants

print(f"Number of participants in Pre only: {len(pre_participants)}")
print(f"Number of participants in Post only: {len(post_participants)}")
print(f"Number of common participants: {len(common_participants)}")

# Filter datasets to only include common participants
pre_data = panas_data[(panas_data['type'] == 'Pre') & (panas_data['uid'].isin(common_participants))]
post_data = panas_data[(panas_data['type'] == 'Post') & (panas_data['uid'].isin(common_participants))]

# Save the filtered data
pre_data.to_csv('./data/Panas/Panas_Pre.csv', index=False)
post_data.to_csv('./data/Panas/Panas_Post.csv', index=False)

# Check final data counts
print("Final Pre data counts (non-NaN):")
print(pre_data.count())
print("\nFinal Post data counts (non-NaN):")
print(post_data.count())

Unique 'type' values before adjustment: ['pre' 'post']
Unique 'uid' values check (sample): 56    u15
8     u09
78    u49
16    u18
71    u36
Name: uid, dtype: object
Number of participants in Pre only: 46
Number of participants in Post only: 39
Number of common participants: 38
Final Pre data counts (non-NaN):
uid             38
type            38
Interested      38
Distressed      38
Upset           38
Strong          38
Guilty          38
Scared          38
Hostile         38
Enthusiastic    38
Proud           38
Irritable       38
Alert           38
Inspired        38
Nervous         38
Determined      38
Attentive       38
Jittery         38
Active          38
Afraid          38
dtype: int64

Final Post data counts (non-NaN):
uid             38
type            38
Interested      38
Distressed      38
Upset           38
Strong          38
Guilty          38
Scared          38
Hostile         38
Enthusiastic    38
Proud           38
Irritable       38
Alert           38
Inspired     

In [18]:
# Adjust the indices to start from index 2 in the dataframe
positive_indices = [2, 5, 9, 10, 12, 13, 15, 16, 18]  # Adjusted for 0-based indexing and shifted right by 2
negative_indices = [3, 4, 6, 7, 8, 11, 14, 17, 19]  # Adjusted for 0-based indexing and shifted right by 2

# Extract column names for the positive and negative affects
positive_columns_pre = pre_data.columns[positive_indices]
negative_columns_pre = pre_data.columns[negative_indices]

positive_columns_post = post_data.columns[positive_indices]
negative_columns_post = post_data.columns[negative_indices]

# Recalculate total scores for positive and negative affect for each user
pre_data['Positive_Total'] = pre_data[positive_columns_pre].sum(axis=1)
pre_data['Negative_Total'] = pre_data[negative_columns_pre].sum(axis=1)

post_data['Positive_Total'] = post_data[positive_columns_post].sum(axis=1)
post_data['Negative_Total'] = post_data[negative_columns_post].sum(axis=1)

pre_data.to_csv('./data/Panas/Panas_Pre.csv', index=False)
post_data.to_csv('./data/Panas/Panas_Post.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_data['Positive_Total'] = pre_data[positive_columns_pre].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_data['Negative_Total'] = pre_data[negative_columns_pre].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_data['Positive_Total'] = post_data[positive_columns_post

# Pre and Post

In [2]:
import pandas as pd
from scipy import stats

# Load the pre and post survey data from separate CSV files
pre_data = pd.read_csv('./data/Panas/Panas_Pre.csv')
post_data = pd.read_csv('./data/Panas/Panas_Post.csv')

# Merge the pre and post survey data on the 'uid' column
merged_data = pd.merge(pre_data, post_data, on='uid', suffixes=('_pre', '_post'))

stat_pre, p_pre = stats.shapiro(merged_data['Positive_Total_pre'])
stat_post, p_post = stats.shapiro(merged_data['Positive_Total_post'])



print(f"Pre data normality test: statistic={stat_pre}, p-value={p_pre}")
print(f"Post data normality test: statistic={stat_post}, p-value={p_post}")


# Perform a paired t-test
t_stat, p_value = stats.ttest_rel(merged_data['Positive_Total_post'], merged_data['Positive_Total_pre'])

print(f"T-statistic: {t_stat}, p-value: {p_value}")

Pre data normality test: statistic=0.9637963175773621, p-value=0.2516540288925171
Post data normality test: statistic=0.9762231111526489, p-value=0.5842457413673401
T-statistic: -0.05351634106993908, p-value: 0.957608425249791
