In [None]:
import pandas as pd

# Load main article list
main_df = pd.read_csv("200_finals/papers_included_505.csv")

# Files with articles to remove
remove_files = [
    "200_finals/remove-list/1 (1).csv",
    "200_finals/remove-list/1 (2).csv", 
    "200_finals/remove-list/1 (3).csv",
    "200_finals/remove-list/1 (4).csv",
]

# Collect all articles to remove (with full details)
all_remove_articles = []
for file in remove_files:
    df = pd.read_csv(file)
    all_remove_articles.append(df)
    print(f"Added {len(df)} articles from {file}")

# Combine all remove articles
remove_df = pd.concat(all_remove_articles)
remove_df['DOI'] = remove_df['DOI'].astype(str).str.strip()

# Remove duplicates and save remove list
remove_df = remove_df.drop_duplicates(subset='DOI')
remove_df.to_csv("removed_articles.csv", index=False)

# Clean and filter main list
main_df['DOI'] = main_df['DOI'].astype(str).str.strip()
filtered_df = main_df[~main_df['DOI'].isin(remove_df['DOI'])]

# PATCH: Add back mistakenly excluded DOIs
misexcluded_dois = [
    "10.1016/j.buildenv.2020.106741",
    "10.1016/j.jobe.2023.106272",
    "10.1016/j.autcon.2024.105823",
    "10.1016/j.autcon.2020.103277",
    "10.1016/j.enbuild.2023.113291",
    "10.1016/j.aei.2023.102239",
    "10.1016/j.buildenv.2023.110595",
    "10.1016/j.buildenv.2020.106741"
]

# Get the articles that were mistakenly removed
misexcluded_articles = main_df[main_df['DOI'].isin(misexcluded_dois)]

# Add them back to the filtered list
final_df = pd.concat([filtered_df, misexcluded_articles])
final_df = final_df.drop_duplicates(subset='DOI')  # Remove any duplicates

# Save results
final_df.to_csv("papers_included_filtered.csv", index=False)

# Show summary
print(f"\nOriginal: {len(main_df)} articles")
print(f"Removed: {len(remove_df)} articles")
print(f"Mistakenly excluded: {len(misexcluded_articles)} articles")
print(f"Final after patch: {len(final_df)} articles")
print("Saved: 'papers_included_filtered.csv' and 'removed_articles.csv'")

Added 4 articles from 200_finals/remove-list/1 (1).csv
Added 30 articles from 200_finals/remove-list/1 (2).csv
Added 6 articles from 200_finals/remove-list/1 (3).csv
Added 4 articles from 200_finals/remove-list/1 (4).csv

Original: 505 articles
Removed: 36 articles
Final: 478 articles
Saved: 'papers_included_filtered.csv' and 'removed_articles.csv'
