In [4]:
import pandas as pd

# Load the datasets
file_path1 = 'HRV_Normalized.csv'  
file_path2 = 'Max_Temp_Normalized.csv'  
file_path3 = 'General_data.csv'  

processed_videos = pd.read_csv(file_path1)
max_temperatures = pd.read_csv(file_path2)
general_data = pd.read_csv(file_path3)

# Convert IDs to the same type for all dataframes
processed_videos['ID'] = processed_videos['ID'].astype(str)
max_temperatures['ID'] = max_temperatures['ID'].astype(str)
general_data['ID'] = general_data['ID'].astype(str)

# Merge the dataframes on ID and Phase for Phases 1 and 2
merged_data = pd.merge(
    max_temperatures,
    processed_videos[processed_videos['Phase'] != 'Phase 3'],
    on=['ID', 'Phase'], 
    how='outer'
)

# Merge General_data on ID, retaining all columns from final_data
final_data = pd.merge(
    merged_data,
    general_data[['ID', 'Age', 'Gender', 'Prior_Donation', 'VVR_sum', 'VVR_Encoded']], 
    on='ID', 
    how='left'
)

# Convert ID back to integers for proper logical ordering, then sort
final_data['ID'] = final_data['ID'].astype(int)
final_data_sorted = final_data.sort_values(by=['ID', 'Phase']).reset_index(drop=True)

# Filter only Phases 1 and 2
final_data_filtered = final_data[final_data['Phase'].isin(['Phase 1', 'Phase 2'])]

# Identify rows with missing values
rows_with_missing_values = final_data_filtered[final_data_filtered.isnull().any(axis=1)]

# Remove rows with missing values
final_data_cleaned = final_data_filtered.dropna()

# Convert specific columns to integers using .loc[]
columns_to_int = ['Age', 'Gender', 'Prior_Donation', 'VVR_sum', 'VVR_Encoded']
final_data_cleaned.loc[:, columns_to_int] = final_data_cleaned.loc[:, columns_to_int].astype(int)

# Rearrange columns as specified
columns_order = [
    'ID', 'Phase', 'Age', 'Gender', 'Prior_Donation', 
    'Lcheek_max', 'Rcheek_max', 'nose_max', 'chin_max', 'below_nose_max', 
    'HRV_minmax', 'VVR_sum', 'VVR_Encoded'
]
final_data_cleaned = final_data_cleaned[columns_order]

# Save the cleaned dataset
final_ordered_file_path = 'End_dataframe.csv'  
final_data_cleaned.to_csv(final_ordered_file_path, index=False)


