# Merging the csv files with the API information and the concatenated web scraped reviews

In [1]:
import pandas as pd

# Load the emissions CSV (created with the API data)
emissions_csv = 'filtered_emissions_with_links.csv'
emissions_df = pd.read_csv(emissions_csv)

# Load the reviews CSV (provided with review data)
reviews_csv = 'tripadvisor_reviews_automated.csv'  # Replace with your filename
reviews_df = pd.read_csv(reviews_csv, delimiter=';')

# Aggregate reviews data
aggregated_reviews = reviews_df.groupby('Destination').agg(
    reviews=('Content', lambda x: ' || '.join(x.dropna().astype(str))),  # Concatenate reviews with ' || ' separator
    average_rating=('Rating', 'mean')  # Compute average rating
).reset_index()

# Merge the emissions data with the aggregated reviews based on 'Destination'
merged_df = pd.merge(emissions_df, aggregated_reviews, how='left', left_on='destination', right_on='Destination')

# Drop the extra 'Destination' column from the reviews CSV
merged_df.drop(columns=['Destination'], inplace=True)

# Save the merged result to a new CSV file
output_csv = 'emissions_with_reviews.csv'
merged_df.to_csv(output_csv, index=False, encoding='utf-8')

print(f"Merged CSV saved as '{output_csv}'")

Merged CSV saved as 'emissions_with_reviews.csv'
