# Prepare dataset

### Clear Solomon data

Convert all distant (2) & unclear (3) interactions to no-interaction (0) to ensure dichotomous outcome.

In [1]:
import pandas as pd

# List of dataset names
dataset_names = ['DYAD06NF', 'DYAD10NF', 'DYAD11NF', 'DYAD12NF', 'DYAD14NF',
                  'DYAD15NF', 'DYAD16NF', 'DYAD18NF', 'DYAD21NF', 'DYAD23NF', 'DYAD24NF'
                ]

# Base directory for input and output
input_dir = '/Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/'
output_dir = '/Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/'

# Process each dataset
for dat_name in dataset_names:
    print(f"Processing {dat_name}...")
    
    # Load the dataset
    file_path = f"{input_dir}solomon_{dat_name}.csv"
    df = pd.read_csv(file_path)
    
    # Replace `2` (distant) and `3` (unclear) with `0` (no interaction)
    df[['si_ry', 'si_by', 'si_rb']] = df[['si_ry', 'si_by', 'si_rb']].replace({2: 0, 3: 0})
    
    # Save the modified dataset
    output_file = f"{output_dir}solomon_{dat_name}_dichotomous.csv"
    df.to_csv(output_file, index=False)
    print(f"Saved processed file to {output_file}")

print("Processing complete.")


Processing DYAD06NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD06NF_dichotomous.csv
Processing DYAD10NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD10NF_dichotomous.csv
Processing DYAD11NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD11NF_dichotomous.csv
Processing DYAD12NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD12NF_dichotomous.csv
Processing DYAD14NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD14NF_dichotomous.csv
Processing DYAD15NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD15NF_dichotomous.csv
Processing DYAD16NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/solomon_DYAD16NF_dichotomous.csv
Processing DYAD18NF...
Saved processed file to /Users/ruzenkakaldenbach/Desktop/Beh

In [2]:
df

Unnamed: 0,frame_timestamp,si_ry,si_by,si_rb
0,0.00,0,0,0
1,0.25,0,0,0
2,0.50,0,0,0
3,0.75,0,0,0
4,1.00,0,0,0
...,...,...,...,...
2414,603.50,0,0,0
2415,603.75,0,0,0
2416,604.00,0,0,0
2417,604.25,0,0,0


### Create a common dataset for Loopy and Solomon data

The resulting dataset will contain Loopy data as predictor (distance, angle, facing) and Solomon data as outcome (social interaction). All dyads will be listed one below the other. All spreadsheets will then be listed one below the other.

In [4]:
import pandas as pd
import numpy as np

# List of dataset names
dataset_names = ['DYAD06NF', 'DYAD10NF', 'DYAD11NF', 'DYAD12NF', 'DYAD14NF',
                  'DYAD15NF', 'DYAD16NF', 'DYAD18NF', 'DYAD21NF', 'DYAD23NF', 'DYAD24NF'
                ]

# Directories for input and output
solomon_dir = '/Users/ruzenkakaldenbach/Desktop/Behaviour/solomon/'
loopy_dir = '/Users/ruzenkakaldenbach/Desktop/Behaviour/Loopy_preprocessed_data/'
output_file = '/Users/ruzenkakaldenbach/Desktop/Behaviour/common_dataset.csv'

# Initialize an empty DataFrame for the combined dataset
common_dataset = pd.DataFrame()

# Loop through each dataset
for dataset in dataset_names:
    print(f"Processing {dataset}...")

    # Load Solomon data
    solomon_file = f'{solomon_dir}solomon_{dataset}_dichotomous.csv'
    solomon_data = pd.read_csv(solomon_file)

    # Load Loopy data
    loopy_file = f'{loopy_dir}Loopy_{dataset}__processed.csv'
    loopy_data = pd.read_csv(loopy_file)

    # Extract relevant dyad columns from Loopy data
    loopy_data_expanded = pd.DataFrame()
    for dyad, columns in [
        ('red-yellow', ['dist_c_ry', 'dist_f_ry', 'deg_ry', 'facing_ry']),
        ('blue-yellow', ['dist_c_by', 'dist_f_by', 'deg_by', 'facing_by']),
        ('red-blue', ['dist_c_rb', 'dist_f_rb', 'deg_rb', 'facing_rb']),
    ]:
        dyad_data = loopy_data[columns].copy()  # Extract and copy relevant columns
        dyad_data['dyad'] = dyad
        dyad_data['frame_timestamp'] = loopy_data['frame_timestamp']
        dyad_data['video'] = dataset  # Add a column with the dataset name
        dyad_data.columns = ['distance_central', 'distance_front', 'angle', 'facing', 'dyad', 'frame_timestamp', 'video']
        loopy_data_expanded = pd.concat([loopy_data_expanded, dyad_data], ignore_index=True)

    # Extract relevant dyad columns from Solomon data
    solomon_data_expanded = pd.DataFrame()
    for dyad, column in [
        ('red-yellow', 'si_ry'),
        ('blue-yellow', 'si_by'),
        ('red-blue', 'si_rb'),
    ]:
        dyad_data = solomon_data[['frame_timestamp', column]].copy()
        dyad_data['dyad'] = dyad
        dyad_data['video'] = dataset  # Add a column with the dataset name
        dyad_data.columns = ['frame_timestamp', 'interaction', 'dyad', 'video']
        solomon_data_expanded = pd.concat([solomon_data_expanded, dyad_data], ignore_index=True)

    # Merge Loopy and Solomon data
    merged_data = pd.merge(loopy_data_expanded, solomon_data_expanded, on=['frame_timestamp', 'dyad', 'video'], how='inner')

    # Append to the common dataset
    common_dataset = pd.concat([common_dataset, merged_data], ignore_index=True)

# Save the combined dataset to a CSV file
common_dataset.to_csv(output_file, index=False)
print(f"Saved combined dataset to {output_file}")

common_dataset


Processing DYAD06NF...
Processing DYAD10NF...
Processing DYAD11NF...
Processing DYAD12NF...
Processing DYAD14NF...
Processing DYAD15NF...
Processing DYAD16NF...
Processing DYAD18NF...
Processing DYAD21NF...
Processing DYAD23NF...
Processing DYAD24NF...
Saved combined dataset to /Users/ruzenkakaldenbach/Desktop/Behaviour/common_dataset.csv


Unnamed: 0,distance_central,distance_front,angle,facing,dyad,frame_timestamp,video,interaction
0,790.687264,800.025973,0.938036,0,red-yellow,0.00,DYAD06NF,0
1,792.303958,804.337355,1.308301,0,red-yellow,0.25,DYAD06NF,0
2,783.932649,793.611577,6.627138,0,red-yellow,0.50,DYAD06NF,0
3,853.480357,782.492790,22.068184,1,red-yellow,0.75,DYAD06NF,0
4,865.821415,780.327547,21.733946,1,red-yellow,1.00,DYAD06NF,0
...,...,...,...,...,...,...,...,...
79681,2164.464169,1912.349103,153.181946,1,red-blue,602.75,DYAD24NF,0
79682,2237.461605,2001.133668,153.815582,1,red-blue,603.00,DYAD24NF,0
79683,2160.783724,1892.775810,168.084441,1,red-blue,603.25,DYAD24NF,0
79684,2063.770212,1721.512237,167.615933,1,red-blue,603.50,DYAD24NF,0


In [5]:
# Create the new column `distance` as the mean of `distance_front` and `distance_central`
common_dataset['distance'] = common_dataset[['distance_front', 'distance_central']].mean(axis=1)

# Drop the old columns `distance_front` and `distance_central`
common_dataset = common_dataset.drop(columns=['distance_front', 'distance_central'])

# Reorder the columns to place `distance` at the position of the original columns
columns_order = ['distance'] + [col for col in common_dataset.columns if col != 'distance']
common_dataset = common_dataset[columns_order]

# Save the updated dataset if needed
common_dataset.to_csv("/Users/ruzenkakaldenbach/Desktop/common_dataset.csv", index=False)

common_dataset

Unnamed: 0,distance,angle,facing,dyad,frame_timestamp,video,interaction
0,795.356618,0.938036,0,red-yellow,0.00,DYAD06NF,0
1,798.320657,1.308301,0,red-yellow,0.25,DYAD06NF,0
2,788.772113,6.627138,0,red-yellow,0.50,DYAD06NF,0
3,817.986573,22.068184,1,red-yellow,0.75,DYAD06NF,0
4,823.074481,21.733946,1,red-yellow,1.00,DYAD06NF,0
...,...,...,...,...,...,...,...
79681,2038.406636,153.181946,1,red-blue,602.75,DYAD24NF,0
79682,2119.297637,153.815582,1,red-blue,603.00,DYAD24NF,0
79683,2026.779767,168.084441,1,red-blue,603.25,DYAD24NF,0
79684,1892.641224,167.615933,1,red-blue,603.50,DYAD24NF,0


# Apply ML

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import itertools

# Load the dataset
file_path = '/Users/ruzenkakaldenbach/Desktop/common_dataset_mean_distance.csv'
common_dataset = pd.read_csv(file_path)

# List of dataset names (videos)
dataset_names = ['DYAD06NF', 'DYAD10NF', 'DYAD11NF', 'DYAD12NF', 'DYAD14NF',
                  'DYAD15NF', 'DYAD16NF', 'DYAD18NF', 'DYAD21NF', 'DYAD23NF', 'DYAD24NF'
                ]


# Lists to store accuracies for averaging later
training_accuracies = []
testing_accuracies = []

# Loop through each possible test video
for test_video in dataset_names:
    print(f"Processing with test video: {test_video}...")

    # Divide into training and test datasets
    # Generate all combinations of 3 test videos (leave 8 for training)
    test_combinations = list(itertools.combinations(dataset_names, 3))

    # Loop through each test set combination
    for test_videos in test_combinations:  
        train_videos = [video for video in dataset_names if video not in test_videos] # All except selected 3 test videos

    train_data = common_dataset[common_dataset['video'].isin(train_videos)]
    test_data = common_dataset[common_dataset['video'].isin(test_videos)]


   
    # Shuffle datasets after train-test split to prevent biases
    train_data_balanced = train_data.sample(frac=1, random_state=42).reset_index(drop=True)
    test_data_balanced = test_data.sample(frac=1, random_state=42).reset_index(drop=True)


    # Logistic Regression
    # Select predictors (distance, angle, facing) and outcome (interaction)
    predictors = ['distance', 'angle', 'facing']
    outcome = 'interaction'

    X_train = train_data_balanced[predictors]
    y_train = train_data_balanced[outcome]
    X_test = test_data_balanced[predictors]
    y_test = test_data_balanced[outcome]

    # Train the logistic regression model
    model = LogisticRegression(random_state=42)
    model.fit(X_train, y_train)

    # Evaluate on the training set
    y_train_pred = model.predict(X_train) # Once the model is fit, this only writes out the existing rows for how well the model fits the data to predict outcome from predictor
    train_accuracy = accuracy_score(y_train, y_train_pred) * 100 # Built-in function imported, compares the true values (y_train) with the predicted values (y_train_pred) and calculates the proportion of correct predictions
    training_accuracies.append(train_accuracy) # Store for averaging across videos later
    print("\nTraining Set Performance:")
    # Format and display the training confusion matrix
    cm_train = confusion_matrix(y_train, y_train_pred)
    cm_train_df = pd.DataFrame(
        cm_train,
        index=["Actual 0", "Actual 1"],
        columns=["Predicted 0", "Predicted 1"]
    )
    # True Positives (TP): Correctly predicted 1.
    # True Negatives (TN): Correctly predicted 0.
    # False Positives (FP): Predicted 1 when the true value was 0.
    # False Negatives (FN): Predicted 0 when the true value was 1.
    print("\nConfusion Matrix (Training):")
    print(cm_train_df)
    #print(confusion_matrix(y_train, y_train_pred))
    print("\nClassification Report (Training):")
    # Precision: Proportion of positive predictions (1) that were correct, TP/(TP+FP)
    # Recall or sensitivity: Proportion of actual positives (1) that were identified, TP/(TP+FN)
    # F1-Score: Harmonic mean of precision and recall, 2x(precision*recall)/(precision-recall)
    # Support: Total number of actual occurences within the 0 and 1 category
    print(classification_report(y_train, y_train_pred))
    print(f"\nAccuracy Score (Training): {train_accuracy:.2f}%")

    # Evaluate on the test set
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred) * 100
    testing_accuracies.append(test_accuracy)
    print("\nTest Set Performance:")
    # Format and display the training confusion matrix
    cm_test = confusion_matrix(y_test, y_test_pred)
    cm_test_df = pd.DataFrame(
        cm_test,
        index=["Actual 0", "Actual 1"],
        columns=["Predicted 0", "Predicted 1"]
    )
    print("\nConfusion Matrix (Test):")
    print(cm_test_df)
    #print(confusion_matrix(y_test, y_test_pred))
    print("\nClassification Report (Test):")
    print(classification_report(y_test, y_test_pred))
    print(f"\nAccuracy Score (Test): {test_accuracy:.2f}%")

    # Save the balanced datasets for this split
    train_output_path = f'/Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_{",".join(test_videos)}.csv'
    test_output_path = f'/Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_{",".join(test_videos)}.csv'
    train_data_balanced.to_csv(train_output_path, index=False)
    test_data_balanced.to_csv(test_output_path, index=False)

    print(f"Training dataset for test video {test_video} saved to {train_output_path}")
    print(f"Test dataset for test video {test_video} saved to {test_output_path}")

# Calculate average accuracies
average_train_accuracy = sum(training_accuracies) / len(training_accuracies)
average_test_accuracy = sum(testing_accuracies) / len(testing_accuracies)

print("\nOverall Performance:")
print(f"Average Training Accuracy: {average_train_accuracy:.2f}%")
print(f"Average Testing Accuracy: {average_test_accuracy:.2f}%")
print("Processing complete.")


Processing with test video: DYAD06NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19572          272
Actual 1         1134          700

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.95      0.99      0.97     19844
           1       0.72      0.38      0.50      1834

    accuracy                           0.94     21678
   macro avg       0.83      0.68      0.73     21678
weighted avg       0.93      0.94      0.93     21678


Accuracy Score (Training): 93.51%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8715          159
Actual 1          277          119

Classification Report (Test):
              precision    recall  f1-score   support

           0       0.97      0.98      0.98      8874
           1       0.43      0.30      0.35       396

    accuracy                           0.95      927

### Include interaction between preditors

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import itertools

# Function to add interaction terms
def add_interaction_terms(data):
    data = data.copy()
    data['distance_angle'] = data['distance'] * data['angle']
    data['distance_facing'] = data['distance'] * data['facing']
    data['angle_facing'] = data['angle'] * data['facing']
    data['distance_angle_facing'] = data['distance'] * data['angle'] * data['facing']
    return data

# Load the dataset
file_path = '/Users/ruzenkakaldenbach/Desktop/common_dataset_mean_distance.csv'
common_dataset = pd.read_csv(file_path)

# List of dataset names (videos)
dataset_names = ['DYAD06NF', 'DYAD10NF', 'DYAD11NF', 'DYAD12NF', 'DYAD14NF',
                  'DYAD15NF', 'DYAD16NF', 'DYAD18NF', 'DYAD21NF', 'DYAD23NF', 'DYAD24NF'
                ]


# Lists to store accuracies for averaging later
training_accuracies = []
testing_accuracies = []

# Loop through each possible test video
for test_video in dataset_names:
    print(f"Processing with test video: {test_video}...")

    # Divide into training and test datasets
    # Generate all combinations of 3 test videos (leave 8 for training)
    test_combinations = list(itertools.combinations(dataset_names, 3))

    # Loop through each test set combination
    for test_videos in test_combinations:  
        train_videos = [video for video in dataset_names if video not in test_videos] # All except selected 3 test videos

    train_data = common_dataset[common_dataset['video'].isin(train_videos)]
    test_data = common_dataset[common_dataset['video'].isin(test_videos)]


   
    # Shuffle datasets after train-test split to prevent biases
    train_data_balanced = train_data.sample(frac=1, random_state=42).reset_index(drop=True)
    test_data_balanced = test_data.sample(frac=1, random_state=42).reset_index(drop=True)

    # Add interaction terms to both train and test sets
    train_data_balanced = add_interaction_terms(train_data_balanced)
    test_data_balanced = add_interaction_terms(test_data_balanced)

    # Logistic Regression
    # Update predictors list to include interaction terms
    predictors = ['distance', 'angle', 'facing', 'distance_angle', 'distance_facing', 'angle_facing', 'distance_angle_facing']
    outcome = 'interaction'

    X_train = train_data_balanced[predictors]
    y_train = train_data_balanced[outcome]
    X_test = test_data_balanced[predictors]
    y_test = test_data_balanced[outcome]

    # Train the logistic regression model
    model = LogisticRegression(random_state=42)
    model.fit(X_train, y_train)

    # Evaluate on the training set
    y_train_pred = model.predict(X_train) # Once the model is fit, this only writes out the existing rows for how well the model fits the data to predict outcome from predictor
    train_accuracy = accuracy_score(y_train, y_train_pred) * 100 # Built-in function imported, compares the true values (y_train) with the predicted values (y_train_pred) and calculates the proportion of correct predictions
    training_accuracies.append(train_accuracy) # Store for averaging across videos later
    print("\nTraining Set Performance:")
    # Format and display the training confusion matrix
    cm_train = confusion_matrix(y_train, y_train_pred)
    cm_train_df = pd.DataFrame(
        cm_train,
        index=["Actual 0", "Actual 1"],
        columns=["Predicted 0", "Predicted 1"]
    )
    # True Positives (TP): Correctly predicted 1.
    # True Negatives (TN): Correctly predicted 0.
    # False Positives (FP): Predicted 1 when the true value was 0.
    # False Negatives (FN): Predicted 0 when the true value was 1.
    print("\nConfusion Matrix (Training):")
    print(cm_train_df)
    #print(confusion_matrix(y_train, y_train_pred))
    print("\nClassification Report (Training):")
    # Precision: Proportion of positive predictions (1) that were correct, TP/(TP+FP)
    # Recall or sensitivity: Proportion of actual positives (1) that were identified, TP/(TP+FN)
    # F1-Score: Harmonic mean of precision and recall, 2x(precision*recall)/(precision-recall)
    # Support: Total number of actual occurences within the 0 and 1 category
    print(classification_report(y_train, y_train_pred))
    print(f"\nAccuracy Score (Training): {train_accuracy:.2f}%")

    # Evaluate on the test set
    y_test_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred) * 100
    testing_accuracies.append(test_accuracy)
    print("\nTest Set Performance:")
    # Format and display the training confusion matrix
    cm_test = confusion_matrix(y_test, y_test_pred)
    cm_test_df = pd.DataFrame(
        cm_test,
        index=["Actual 0", "Actual 1"],
        columns=["Predicted 0", "Predicted 1"]
    )
    print("\nConfusion Matrix (Test):")
    print(cm_test_df)
    #print(confusion_matrix(y_test, y_test_pred))
    print("\nClassification Report (Test):")
    print(classification_report(y_test, y_test_pred))
    print(f"\nAccuracy Score (Test): {test_accuracy:.2f}%")

    # Save the balanced datasets for this split
    train_output_path = f'/Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_{",".join(test_videos)}.csv'
    test_output_path = f'/Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_{",".join(test_videos)}.csv'
    train_data_balanced.to_csv(train_output_path, index=False)
    test_data_balanced.to_csv(test_output_path, index=False)

    print(f"Training dataset for test video {test_video} saved to {train_output_path}")
    print(f"Test dataset for test video {test_video} saved to {test_output_path}")

# Calculate average accuracies
average_train_accuracy = sum(training_accuracies) / len(training_accuracies)
average_test_accuracy = sum(testing_accuracies) / len(testing_accuracies)

print("\nOverall Performance:")
print(f"Average Training Accuracy: {average_train_accuracy:.2f}%")
print(f"Average Testing Accuracy: {average_test_accuracy:.2f}%")
print("Processing complete.")


Processing with test video: DYAD06NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1          290          106

Classification Report (Test):
              precision    recall  f1-score   support

           0       0.97      0.99      0.98      8874
           1       0.44      0.27      0.33       396

    accuracy                           0.95      927

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD06NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD06NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD10NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD10NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD10NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD11NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD11NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD11NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD12NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD12NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD12NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD14NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD14NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD14NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD15NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD15NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD15NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD16NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD16NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD16NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD18NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD18NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD18NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD21NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD21NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD21NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD23NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Training dataset for test video DYAD23NF saved to /Users/ruzenkakaldenbach/Desktop/train_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Test dataset for test video DYAD23NF saved to /Users/ruzenkakaldenbach/Desktop/test_dataset_balanced_DYAD21NF,DYAD23NF,DYAD24NF.csv
Processing with test video: DYAD24NF...

Training Set Performance:

Confusion Matrix (Training):
          Predicted 0  Predicted 1
Actual 0        19647          197
Actual 1         1255          579

Classification Report (Training):
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     19844
           1       0.75      0.32      0.44      1834

    accuracy                           0.93     21678
   macro avg       0.84      0.65      0.70     21678
weighted avg       0.92      0.93      0.92     21678


Accuracy Score (Training): 93.30%

Test Set Performance:

Confusion Matrix (Test):
          Predicted 0  Predicted 1
Actual 0         8741          133
Actual 1     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
