In [None]:
import pandas as pd
import numpy as np

file_list = ["0000_man.csv", "0000_woman.csv", "0000_robot.csv","0001_man.csv", "0001_woman.csv", "0001_robot.csv"]

for file_name in file_list:
    input_file = f"{file_name}"
    output_file = f"pNN50_summary_{file_name}"
    
    try:
        df = pd.read_csv(input_file)
        
        df.columns = df.columns.str.strip()
        df.rename(columns={'CharacterStatus': 'Character_status'}, inplace=True)
        
        if 'rr' in df.columns:
            df['rr'] = pd.to_numeric(df['rr'], errors='coerce')
            df['rr'] = df['rr'].interpolate(method='linear')
            df['rr'].fillna(df['rr'].mean(), inplace=True)
            print(df['rr'])
            
            # RR_adjusted addnoise
            df['rr_adjusted'] = df['rr'] + np.random.normal(loc=0, scale=10, size=df.shape[0])
            print(df['rr_adjusted'])
            
            #pNN50 cal
            def calculate_pnn50(group):
                rr_diff = group['rr_adjusted'].diff().dropna()
                nn50 = (rr_diff.abs() > 50).sum()
                total_rr = len(rr_diff)
                return (nn50 / total_rr) * 100 if total_rr > 0 else 0
            
            pnn50_results = df.groupby('Character_status').apply(calculate_pnn50)
            rr_counts = df.groupby('Character_status')['rr_adjusted'].count()
            
            pnn50_summary = pd.DataFrame({
                'pNN50': pnn50_results.round(2).astype(str) + ' %',
                'rr_count': rr_counts
            })
            
            pnn50_summary.to_csv(output_file, index=True)
            print(f"Processed and saved: {output_file}")
            print(pnn50_summary)
        else:
            print(f"Error: No 'RR' column found in {input_file}")
    except Exception as e:
        print(f"Failed to process {input_file}: {e}")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rr'].fillna(df['rr'].mean(), inplace=True)
  pnn50_results = df.groupby('Character_status').apply(calculate_pnn50)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rr'].fillna(df['rr'].mean(), inplace=True)
  pnn50_results = df.groupby('Character_status').apply(calculate_p

0        680.401375
1        680.401375
2        680.401375
3        680.401375
4        680.401375
            ...    
15866    814.000000
15867    814.000000
15868    814.000000
15869    814.000000
15870    814.000000
Name: rr, Length: 15871, dtype: float64
0        686.107696
1        681.532709
2        679.623273
3        677.562651
4        679.455191
            ...    
15866    815.577439
15867    815.457077
15868    818.817393
15869    817.434328
15870    809.957136
Name: rr_adjusted, Length: 15871, dtype: float64
Processed and saved: pNN50_summary_0000_man.csv
                   pNN50  rr_count
Character_status                  
LeftHandToFace     0.0 %      2003
LetfHandToBody     0.0 %      2391
RightHandToBody    0.0 %      1953
RightHandToFace    0.0 %      2900
TouchBody          0.0 %      2261
TouchFace          0.0 %      2512
Walk              0.05 %      1851
0        664.086445
1        664.086445
2        664.086445
3        664.086445
4        664.086445
        

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rr'].fillna(df['rr'].mean(), inplace=True)
  pnn50_results = df.groupby('Character_status').apply(calculate_pnn50)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rr'].fillna(df['rr'].mean(), inplace=True)
  pnn50_results = df.groupby('Character_status').apply(calculate_p