In [1]:
import pandas as pd
import icartt
import os
import warnings
import re
from datetime import datetime
import csv
from datetime import datetime, timedelta
from netCDF4 import Dataset
import numpy as np
from scipy import stats
import glob
from math import pi
import ast

In [2]:
DOE_CAMPAIGNS = ["ACE-ENA", "ACMEV", "BBOP", "CACTI", "CARES", "GOAMAZON",
                 "ISDAC", "TCAP2012", "TCAP2013"]

RESTRICTED_PATH = rf"C:\Users\haika\Desktop\May_Research\may_datasets\restricted_campaigns"
COMPREHENSIVE_PATH = rf"C:\Users\haika\Desktop\May_Research\may_datasets\comprehensive_campaigns"

RESTRICTED_COMBINED_PATH = rf"C:\Users\haika\Desktop\May_Research\may_datasets\restricted_combined"
COMPREHENSIVE_COMBINED_PATH = rf"C:\Users\haika\Desktop\May_Research\may_datasets\comprehensive_combined"

In [3]:

def combine_doe_campaigns(source_path, output_path, output_filename):
    """
    Combine all CSV files containing DOE campaign names from source_path
    and save to output_path with given filename
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_path, exist_ok=True)
    
    # Find all CSV files that contain any DOE campaign name
    matching_files = []
    
    for campaign in DOE_CAMPAIGNS:
        # Look for CSV files containing the campaign name
        pattern = os.path.join(source_path, f"*{campaign}*.csv")
        campaign_files = glob.glob(pattern)
        matching_files.extend(campaign_files)
    
    # Remove duplicates (in case a file matches multiple campaign names)
    matching_files = list(set(matching_files))
    
    if not matching_files:
        print(f"No CSV files found containing DOE campaign names in {source_path}")
        return
    
    print(f"Found {len(matching_files)} files to combine:")
    for file in matching_files:
        print(f"  - {os.path.basename(file)}")
    
    # Read and combine all CSV files
    combined_df = pd.DataFrame()
    
    for file_path in matching_files:
        try:
            df = pd.read_csv(file_path)
            combined_df = pd.concat([combined_df, df], ignore_index=True)
            print(f"Added {len(df)} rows from {os.path.basename(file_path)}")
        except Exception as e:
            print(f"Error reading {os.path.basename(file_path)}: {e}")
    
    # Save combined data
    output_file_path = os.path.join(output_path, output_filename)
    combined_df.to_csv(output_file_path, index=False)
    
    print(f"\nCombined data saved to: {output_file_path}")
    print(f"Total rows: {len(combined_df)}")
    print(f"Total columns: {len(combined_df.columns)}")
    
    return combined_df

# 1. Combine restricted campaigns
print("=== Combining Restricted Campaigns ===")
restricted_combined = combine_doe_campaigns(
    RESTRICTED_PATH, 
    RESTRICTED_COMBINED_PATH, 
    "DOE_restricted.csv"
)

print("\n" + "="*50 + "\n")

# 2. Combine comprehensive campaigns  
print("=== Combining Comprehensive Campaigns ===")
comprehensive_combined = combine_doe_campaigns(
    COMPREHENSIVE_PATH, 
    COMPREHENSIVE_COMBINED_PATH, 
    "DOE_comprehensive.csv"
)

=== Combining Restricted Campaigns ===
Found 9 files to combine:
  - CARES_restricted.csv
  - TCAP2012_restricted.csv
  - ACMEV_restricted.csv
  - ACE-ENA_restricted.csv
  - ISDAC_restricted.csv
  - TCAP2013_restricted.csv
  - BBOP_restricted.csv
  - GOAMAZON_restricted.csv
  - CACTI_restricted.csv
Added 222 rows from CARES_restricted.csv
Added 0 rows from TCAP2012_restricted.csv
Added 6968 rows from ACMEV_restricted.csv
Added 0 rows from ACE-ENA_restricted.csv
Added 0 rows from ISDAC_restricted.csv
Added 8644 rows from TCAP2013_restricted.csv


  combined_df = pd.concat([combined_df, df], ignore_index=True)
  combined_df = pd.concat([combined_df, df], ignore_index=True)
  combined_df = pd.concat([combined_df, df], ignore_index=True)


Added 7686 rows from BBOP_restricted.csv
Added 0 rows from GOAMAZON_restricted.csv
Added 1242 rows from CACTI_restricted.csv


  combined_df = pd.concat([combined_df, df], ignore_index=True)



Combined data saved to: C:\Users\haika\Desktop\May_Research\may_datasets\restricted_combined\DOE_restricted.csv
Total rows: 24762
Total columns: 32


=== Combining Comprehensive Campaigns ===
Found 9 files to combine:
  - TCAP2013_comprehensive.csv
  - GOAMAZON_comprehensive.csv
  - ACMEV_comprehensive.csv
  - ISDAC_comprehensive.csv
  - BBOP_comprehensive.csv
  - CARES_comprehensive.csv
  - ACE-ENA_comprehensive.csv
  - TCAP2012_comprehensive.csv
  - CACTI_comprehensive.csv
Added 18901 rows from TCAP2013_comprehensive.csv
Added 331893 rows from GOAMAZON_comprehensive.csv
Added 55849 rows from ACMEV_comprehensive.csv
Added 441569 rows from ISDAC_comprehensive.csv
Added 64847 rows from BBOP_comprehensive.csv
Added 2838 rows from CARES_comprehensive.csv
Added 546787 rows from ACE-ENA_comprehensive.csv
Added 15523 rows from TCAP2012_comprehensive.csv
Added 38596 rows from CACTI_comprehensive.csv

Combined data saved to: C:\Users\haika\Desktop\May_Research\may_datasets\comprehensive_combi