In [13]:
import numpy as np

# Load data from text files and compare them
def compare_sequences(features_file, labels_file):
    # Load features and labels
    X = np.loadtxt(features_file, dtype=int)
    y = np.loadtxt(labels_file, dtype=int)
    
    # Check if the lengths of the two arrays are equal
    if len(X) != len(y):
        raise ValueError("The number of lines in both files must be equal.")
    
    # Count the number of lines where the values are different
    differences = np.sum(X != y)
    
    return differences

# Main function
def main(features_file, labels_file):
    differences = compare_sequences(features_file, labels_file)
    print(f"Number of lines with different values: {differences}")

# Example usage
if __name__ == "__main__":
    features_file = 'pred_textseq.txt'  # Path to the file containing the features
    labels_file = 'pred_combined.txt'        # Path to the file containing the labels
    main(features_file, labels_file)


Number of lines with different values: 557


In [15]:
def count_differences(file1, file2):
    """Count the number of differing lines between two files."""
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        lines1 = f1.readlines()
        lines2 = f2.readlines()
    
    # Normalize lengths by padding the shorter file with empty strings
    max_length = max(len(lines1), len(lines2))
    lines1 += [''] * (max_length - len(lines1))
    lines2 += [''] * (max_length - len(lines2))

    differences = sum(1 for l1, l2 in zip(lines1, lines2) if l1.strip() != l2.strip())
    return differences

def compare_files(file_names):
    """Compare all files with each other."""
    results = {}
    
    for i in range(len(file_names)):
        for j in range(i + 1, len(file_names)):
            file1 = file_names[i]
            file2 = file_names[j]
            diff_count = count_differences(file1, file2)
            results[f"{file1} vs {file2}"] = diff_count
    
    return results

# List of your four specific files
file_names = ['pred_emoticon.txt', 'pred_deepfeat.txt', 'pred_textseq.txt', 'pred_combined.txt']

# Compare the files and print the results
differences = compare_files(file_names)

for comparison, count in differences.items():
    print(f"Differences between {comparison}: {count}")


Differences between pred_emoticon.txt vs pred_deepfeat.txt: 840
Differences between pred_emoticon.txt vs pred_textseq.txt: 107
Differences between pred_emoticon.txt vs pred_combined.txt: 528
Differences between pred_deepfeat.txt vs pred_textseq.txt: 867
Differences between pred_deepfeat.txt vs pred_combined.txt: 380
Differences between pred_textseq.txt vs pred_combined.txt: 557
