In [3]:
import pandas as pd
from datetime import datetime, timedelta
from typing import Union

In [None]:

def read_csv_file(file_path: str, delimiter: str = ',', header: Union[int, str] = 'infer', encoding: str = 'utf-8') -> pd.DataFrame:
    """
    Reads a CSV file and returns it as a pandas DataFrame.

    :param file_path: The path to the CSV file.
    :param delimiter: Character that separates the fields in the CSV file. Default is ','.
    :param header: Index of the row containing the header (column names), or 'infer' to infer it as the first line, or None if no header. Default is 'infer'.
    :param encoding: The encoding of the CSV file. Default is 'utf-8'.
    :return: A pandas DataFrame containing the content of the CSV file.
    """
    return pd.read_csv(file_path, delimiter=delimiter, header=header, encoding=encoding)


In [16]:
def combine_files(cme_file: str, electron_file: str, output_file: str) -> None:
    """
    Combines data from CME file and electron data file.
    For every row in the CME file, finds the electron and electron_high values within a 2-hour window and appends them as additional columns.
    Writes the combined data to the specified output file.

    :param cme_file: Path to the CME CSV file.
    :param electron_file: Path to the electron data CSV file.
    :param output_file: Path to the output CSV file.
    """
    print("Reading CME and electron data...")
    cme_data = pd.read_csv(cme_file)
    print(cme_data.head())
    electron_data = pd.read_csv(electron_file)
    print(electron_data.head())
    electron_data['time'] = pd.to_datetime(electron_data['time'], format="%Y-%m-%dT%H:%M:%S.%f")
    print(electron_data.head())

    print("Creating new columns for electron data...")
    for i in range(24):
        cme_data[f'e_t-{24 - i}'] = None
        cme_data[f'eh_t-{24 - i}'] = None

    print("Iterating through CME data to combine with electron data...")
    for index, row in cme_data.iterrows():
        print(f"Processing CME row {index + 1} of {len(cme_data)}")
        # Finding electron data within 2-hour window
        cme_time = datetime.strptime(row['donki_date'], "%Y-%m-%d %H:%M:%S")
        print(f"Processing CME at {cme_time}")
        start_time = cme_time - timedelta(hours=2)
        print(f"Searching for electron data between {start_time} and {cme_time}")
        window_data = electron_data[(electron_data['time'] >= start_time) & (electron_data['time'] <= cme_time)]
        print(f"Found {len(window_data)} corresponding electron data points for the 2-hour window")

        for i in range(24):
            if i < len(window_data):
                row[f'e_t-{24 - i}'] = window_data.iloc[-(i+1)]['electron']
                row[f'eh_t-{24 - i}'] = window_data.iloc[-(i+1)]['electron_high']
        cme_data.iloc[index] = row

    print("Writing combined data to the output file...")
    cme_data.to_csv(output_file, index=False)
    print("Operation completed successfully!")


In [17]:
electron_file_path = "../cme_and_electron/electron_data.csv"
cme_file_path = "../cme_and_electron/DONKI_CDAW_CMEs.csv"
output_file_path = "../cme_and_electron/combined.csv"
combine_files(cme_file_path, electron_file_path, output_file_path)

Reading CME and electron data...
            donki_date            cdaw_date  ESP  2nd_order_speed_20R  \
0  2010-04-03 09:54:00  2010-04-03 10:33:00    0                  661   
1  2010-06-13 07:32:00  2010-06-13 06:30:00    0                  435   
2  2010-06-20 03:18:00  2010-06-20 02:06:00    0                  548   
3  2010-07-03 01:30:00  2010-07-03 01:30:00    0                  447   
4  2010-08-01 02:42:00  2010-08-01 00:30:00    0                    0   

   latitude  longitude  donki_ha  donki_speed  solar_wind_speed  Type_2_Area  \
0         7        8.0        26          620             487.3            0   
1        17      117.0        30          500             427.5            0   
2       -13      -80.0        20          570             387.2            0   
3        -4       35.0        23          625             602.6            0   
4         7      -22.0        48          760             509.8            0   

   ...  Max_speed_past_day  CMEs_over_1000_past

In [21]:
# Read electron data
electron_data = pd.read_csv(electron_file_path)
electron_data['time'] = pd.to_datetime(electron_data['time'], format="%Y-%m-%dT%H:%M:%S.%f")

# Find the minimum and maximum time
electron_min_time = electron_data['time'].min()
electron_max_time = electron_data['time'].max()

In [22]:
# Read CME data
cme_data = pd.read_csv(cme_file_path)

# Convert the 'donki_date' column to datetime
cme_data['donki_date'] = pd.to_datetime(cme_data['donki_date'], format="%Y-%m-%d %H:%M:%S")

# Find the minimum and maximum time
cme_min_time = cme_data['donki_date'].min()
cme_max_time = cme_data['donki_date'].max()


In [23]:
print("Electron Data:")
print(f"Minimum Time: {electron_min_time.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Maximum Time: {electron_max_time.strftime('%Y-%m-%d %H:%M:%S')}")
print("\nCME Data:")
print(f"Minimum Time: {cme_min_time.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Maximum Time: {cme_max_time.strftime('%Y-%m-%d %H:%M:%S')}")

Electron Data:
Minimum Time: 1995-12-07 17:42:31
Maximum Time: 2002-01-31 23:57:26

CME Data:
Minimum Time: 2010-04-03 09:54:00
Maximum Time: 2017-09-04 19:39:00
