In [None]:
import pandas as pd
import os
from datetime import datetime

# Step 1: Load tagged AirlineQuality and TripAdvisor review files
file_airline = "../data_cleaning/cleaned_airlinequality_ethiopian_airlines_reviews.csv"
file_trip = "../data_cleaning/cleaned_tripadvisor_ethiopian_airlines_reviews.csv"

# Step 2: Read both datasets
df_airline = pd.read_csv(file_airline)
df_trip = pd.read_csv(file_trip)

# Step 3: Standardize column names and align structure
df_airline.columns = df_airline.columns.str.strip().str.lower()
df_trip.columns = df_trip.columns.str.strip().str.lower()
df_trip.columns = df_airline.columns  # Ensure columns match exactly

# Step 4: Merge the datasets
merged_df = pd.concat([df_airline, df_trip], ignore_index=True)

# Step 5: Normalize date columns and sort by year and month
month_map = {
    'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
    'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
}
merged_df['month_num'] = merged_df['month'].str[:3].str.lower().map(month_map)
merged_df['year'] = pd.to_numeric(merged_df['year'], errors='coerce')

# Step 6: Filter out future years based on the system's current year
current_year = datetime.now().year
merged_df = merged_df[merged_df['year'] <= current_year].reset_index(drop=True)

# Step 7: Sort and finalize structure
merged_df.sort_values(by=['year', 'month_num'], inplace=True)
merged_df.drop(columns='month_num', inplace=True)
merged_df.reset_index(drop=True, inplace=True)

# Step 8: Drop duplicate rows
merged_df.drop_duplicates(inplace=True)

# Step 9: Save the cleaned and merged output file
final_output = "../data_merging/merged_cleaned_ethiopian_airlines_reviews.csv"
os.makedirs(os.path.dirname(final_output), exist_ok=True)
merged_df.to_csv(final_output, index=False, encoding='utf-8')

# Step 10: Summary preview
print("Final cleaned review dataset saved to:", final_output)
print("Total records:", len(merged_df))


Final cleaned review dataset saved to: C:\Users\abro27\OneDrive\Desktop\Mak\Education\3.Data_Analytics\Final Project\Capstone Projects\Datas\Final_DataSet\merged_cleaned_ethiopian_airlines_reviews.csv
Total records: 4738
