In [6]:
import csv

def replace_values(filename):
    new_filename = filename.replace('.csv', '_modified.csv')

    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        data = list(reader)

    unique_values = set()
    filtered_data = []
    fieldnames = ['PATNO', 'COHORT', 'TESTNAME', 'TESTVALUE']
    
    for row in data:
        if row['COHORT'] == 'Control':
            row['COHORT'] = '0'
        elif row['COHORT'] == 'PD':
            row['COHORT'] = '1'
        
        unique_values.add(row['PATNO'])
        
        if row['COHORT'] == '0' or row['COHORT'] == '1':
            filtered_row = {field: row[field] for field in fieldnames}
            filtered_data.append(filtered_row)

    with open(new_filename, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(filtered_data)

    print(f"Modified CSV file saved as {new_filename}")
    print(f"Number of unique values in PATNO column: {len(unique_values)}")

# Replace 'Test.csv' with the actual path to your CSV file
replace_values('Test2.csv')


Modified CSV file saved as Test2_modified.csv
Number of unique values in PATNO column: 218


In [8]:
import csv

def restructure_data(filename):
    new_filename = filename.replace('_modified.csv', '_restructured.csv')

    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        data = list(reader)

    patients = {}  # Dictionary to store patient data
    testnames = set()  # Set to store unique testnames

    # Iterate over each row to collect data
    for row in data:
        patno = row['PATNO']
        cohort = row['COHORT']
        testname = row['TESTNAME']
        testvalue = row['TESTVALUE']

        # Add testname to the set of unique testnames
        testnames.add(testname)

        # Check if the patient exists in the dictionary
        if patno in patients:
            # Add testvalue for the existing patient and testname
            patients[patno][testname] = testvalue
        else:
            # Create a new entry for the patient
            patients[patno] = {
                'PD': cohort,
                testname: testvalue
            }

    # Create a sorted list of unique testnames
    sorted_testnames = sorted(testnames)

    # Write the restructured data to the new CSV file
    with open(new_filename, 'w', newline='') as file:
        fieldnames = ['PD'] + sorted_testnames
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(patients.values())

    print(f"Restructured CSV file saved as {new_filename}")

# Replace 'Test_modified.csv' with the actual path to your modified CSV file
restructure_data('Test2_modified.csv')


Restructured CSV file saved as Test2_restructured.csv
