In [6]:
import pandas as pd
import csv

# Function to read CSV with error handling
def read_csv(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for line in reader:
            if len(line) > 0:
                data.append(line)
    return data

# Load the data from the three files, handling potential issues with quotation marks
merged_data = pd.DataFrame(read_csv('merged_data.csv')[1:], columns=read_csv('merged_data.csv')[0])
english_events = pd.DataFrame(read_csv('english_events.csv')[1:], columns=read_csv('english_events.csv')[0])
urdu_events = pd.DataFrame(read_csv('urdu_events.csv')[1:], columns=read_csv('urdu_events.csv')[0])
output_file = 'combined_data.csv'

# Remove 'End Date' column and rename 'Start Date' to 'Date' in english_events
english_events.drop('End Date', axis=1, inplace=True)
english_events.rename(columns={'Start Date': 'Date'}, inplace=True)

urdu_events.drop('End Date', axis=1, inplace=True)
urdu_events.rename(columns={'Start Date': 'Date'}, inplace=True)

# Rename columns of english_events and urdu_events
english_events.columns = ['eng-' + col if col != 'Date' else col for col in english_events.columns]
urdu_events.columns = ['urdu-' + col if col != 'Date' else col for col in urdu_events.columns]


# Merge the three dataframes based on the 'Date' column
combined_data = pd.merge(merged_data, english_events, on='Date', how='inner')
combined_data = pd.merge(combined_data, urdu_events, on='Date', how='inner')


# Fill NaN values with interpolation
combined_data = combined_data.ffill().bfill()

# Save the combined data to a new CSV file
combined_data.to_csv(output_file, index=False)

print(f'Combined data saved to {output_file}')


Combined data saved to combined_data.csv
