In [12]:
import pandas as pd
import datetime
import math
import numpy as np

class CoordinateConverter:
    def __init__(self, filename):
        """
        Initialize a CoordinateConverter object with the given filename and a fixed epoch date.
        
        :param filename: The name of the CSV file containing data to be processed.
        """
        self.filename = filename
        self.epoch = datetime.datetime(2000, 1, 1)
        self.grbs_error = []

    def convert_to_xyz(self, longitude, latitude, altitude):
        """
        Convert longitude, latitude, and altitude to Cartesian coordinates (x, y, z).
        
        :param longitude: The longitude in degrees.
        :param latitude: The latitude in degrees.
        :param altitude: The altitude in meters.
        :return: Tuple of Cartesian coordinates (x, y, z).
        """
        longitude_rad = math.radians(longitude)
        latitude_rad = math.radians(latitude)
        
        x = math.cos(latitude_rad) * math.cos(longitude_rad)
        y = math.cos(latitude_rad) * math.sin(longitude_rad)
        z = math.sin(latitude_rad)
        
        return x, y, z

    def process_data(self):
        """
        Read, process, and transform data from the CSV file.
        
        :return: A DataFrame containing processed data with added Cartesian coordinates.
        """
        # Read data from CSV file
        df = pd.read_csv(self.filename)
        
        # Filter rows where exposure(s) is 1
        filtered_df = df[df["exposure(s)"].fillna(0) == 1]
        
        # Drop unnecessary columns
        filtered_df = filtered_df.drop(columns=["sat", "exposure(s)"])
        
        # Convert "direction" column to binary (1 for "north-to-south", 0 for others)
        filtered_df["direction"] = filtered_df["direction"].apply(lambda x: 1 if x == "north-to-south" else 0)
        
        # Convert "time" column to days since epoch
        filtered_df["time"] = pd.to_datetime(filtered_df["time"])
        filtered_df["time"] = (filtered_df["time"] - self.epoch).dt.days + (filtered_df["time"] - self.epoch).dt.seconds / (24 * 60 * 60)
        filtered_df["time"] = filtered_df["time"] - min(filtered_df["time"])
        
        # Convert longitude, latitude, and altitude to Cartesian coordinates
        xyz_coordinates = filtered_df.apply(lambda row: self.convert_to_xyz(row["longitude"], row["latitude"], row["altitude"]), axis=1)
        xyz_df = pd.DataFrame(xyz_coordinates.tolist(), columns=["x", "y", "z"], index=xyz_coordinates.index)
        
        # Concatenate Cartesian coordinates with the filtered DataFrame
        self.result_df = pd.concat([filtered_df, xyz_df], axis=1)
        
        return self.result_df
    
    
    
    def data_split_GONG(self, list_of_GRB_peak_time):
        grbs = []
        df = self.process_data().copy()
        for value in list_of_GRB_peak_time:
            self.grbs_error.append(abs(value - df["time"]))
            if min(list(abs(value - df["time"]))) < 0.1:
                i = np.argmin(abs(value - df["time"]))
                start = max(0, i - (128 + 128))
                end = min(len(df["time"]), i + 128)
                grbs.append(df.iloc[start:end].to_numpy())
                df.drop(df.index[start:end], inplace=True)
        return grbs, df

    def run_pipeline(self, list_of_GRB_peak_time):
        grbs, remaining_data = self.data_split_GONG(list_of_GRB_peak_time)
        return grbs, remaining_data
    

filename = 'all_cubesat_data_newest.csv'
converter = CoordinateConverter(filename)
list_of_GRB_peak_time = [114.91921875020489,
 129.88771018525586,
 147.94674768531695,
 187.43627245351672,
 187.74966655112803,
 488.10759710660204,
 488.5059300926514,
 498.9314782405272,
 502.10961004626006,
 508.0093171298504,
 518.717834490817,
 529.9432841436937,
 530.7335758102126,
 543.053472106345,
 547.800332986284,
 548.2275435994379,
 553.7249170136638,
 556.4554271991365,
 562.5454549770802,
 571.5575516205281,
 575.7461495371535,
 575.9857468632981,
 576.0758231482469,
 576.7625504629686,
 584.1270097224042,
 586.6135028935969,
 591.852205092553,
 601.0158259258606,
 624.2627044213004,
 627.5154278934933,
 661.4058703705668,
 663.6949682869017,
 666.6253009261563,
 668.1562620485201,
 668.233470717445,
 679.0737339700572,
 689.1082076388411,
 690.0697833332233,
 692.1556327547878,
 693.4455982637592,
 696.7549721063115,
 705.3836290510371,
 713.1214045137167,
 732.7726436341181,
 747.0471399538219]
%time grbs, remaining_data = converter.data_split_GONG(list_of_GRB_peak_time)

CPU times: user 31.9 s, sys: 1.59 s, total: 33.5 s
Wall time: 32.3 s


In [3]:
len(converter.result_df)

2055588

In [15]:
min(list(converter.grbs_error[-1]))

0.32180430567404983

In [14]:
len(remaining_data)

2051748

In [13]:
len(grbs)

10