It is valuable to distinguish between two kinds of code used to solve a particular problem: This code was generated using GPT technology. At the start of the project, I believe that I lacked knowledge of some of the easier to harder level code optimizations in Python. I had most of my coding experience in dealing with C,C++ and compiler level using MLIR and the LLVM. While I was exposed to the Dask or other modules of Python for some period of time, I noticed that the code that was generated with the help of GPT was bad . Even though the code that was implemented was not really clean or good it helped me bridge the gap and learn about certain optimization modules in python.



**KINDLY NOTE THE BELOW ATTACHED CODE IS JUST FOR REFERENCE ON WHAT GPT GENERATED...THE FINAL CODE IS ATTACHED IN THE SECOND CELL**



In [None]:
import dask
from dask.distributed import Client, progress
from sgp4.api import Satrec, jday
from datetime import datetime, timedelta
import pytz
from pyproj import Transformer

# Constants
DURATION_MINUTES = 1440  # 1 day (24 hours * 60 minutes)
TLE_FILE_PATH = '/content/30sats.txt'

# Function to read TLE file
def read_tle_file(tle_file):
    with open(tle_file, 'r') as f:
        lines = f.readlines()
    tle_lines = [lines[i:i+2] for i in range(0, len(lines), 3)]
    return tle_lines

# Task 1: Function to get satellite location for a batch of TLE lines
def get_satellite_location_batch(tle_lines, start_time, interval_minutes=1, duration_minutes=DURATION_MINUTES):
    def get_satellite_location(tle_line, start_time, interval_minutes, duration_minutes):
        satellite = Satrec.twoline2rv(tle_line[0], tle_line[1])
        results = []
        jd_start, fr = jday(start_time.year, start_time.month, start_time.day, start_time.hour, start_time.minute, start_time.second)

        for minute in range(0, duration_minutes, interval_minutes):
            jd = jd_start + (minute / 1440.0)
            e, r, v = satellite.sgp4(jd, fr)
            if e == 0:
                time_at_position = start_time + timedelta(minutes=minute)
                results.append((time_at_position, r, v))
            else:
                print(f"Error {e} at minute {minute}")

        return results

    # Task 4: Use Dask to parallelize satellite location calculations for multiple TLE lines
    return [dask.delayed(get_satellite_location)(tle_line, start_time, interval_minutes, duration_minutes) for tle_line in tle_lines]

# Task 2: Function to convert XYZ to Lat/Lon/Alt
def xyz_to_latlonalt(x, y, z):
    transformer = Transformer.from_crs("epsg:4978", "epsg:4326")  # WGS 84
    lat, lon, alt = transformer.transform(x, y, z, radians=False)
    return lat, lon, alt

# Task 3: Function to filter by rectangle
def filter_by_rectangle(data, rectangle):
    def point_in_rectangle(lat, lon, rect):
        lat_min = min(rect[0][0], rect[1][0], rect[2][0], rect[3][0])
        lat_max = max(rect[0][0], rect[1][0], rect[2][0], rect[3][0])
        lon_min = min(rect[0][1], rect[1][1], rect[2][1], rect[3][1])
        lon_max = max(rect[0][1], rect[1][1], rect[2][1], rect[3][1])
        return lat_min <= lat <= lat_max and lon_min <= lon <= lon_max

    return [entry for entry in data if point_in_rectangle(entry[1][0], entry[1][1], rectangle)]

# Task 4: Function to process satellite data with optimizations for distributed computing
def process_satellite_data(tle_lines, start_time, rectangle):
    satellite_data = get_satellite_location_batch(tle_lines, start_time)
    converted_data = []
    for batch in satellite_data:
        for entry in batch:
            time_at_position, position, velocity = entry
            lat, lon, alt = xyz_to_latlonalt(*position)
            converted_data.append((time_at_position, (lat, lon, alt), velocity))
    filtered_data = filter_by_rectangle(converted_data, rectangle)
    return filtered_data

# Main function
def main():
    # Initialize Dask client
    client = Client()

    # Read TLE file
    tle_lines = read_tle_file(TLE_FILE_PATH)

    # Define start time
    start_time = datetime.utcnow().replace(tzinfo=pytz.UTC)

    # Define user-defined rectangle coordinates
    rectangle = [
        (16.66673, 103.58196),
        (69.74973, -120.64459),
        (-21.09096, -119.71009),
        (-31.32309, -147.79778)
    ]

    # Use Dask for parallel processing
    processed_data = process_satellite_data(tle_lines, start_time, rectangle)

    # Track progress
    progress(processed_data)

    # Wait for computation to finish and output results
    processed_data_result = processed_data.compute()
    for entry in processed_data_result:
        time_at_position, position, velocity = entry
        lat, lon, alt = position
        print(f"Time: {time_at_position}, Lat: {lat:.6f}, Lon: {lon:.6f}, Alt: {alt:.2f}, "
              f"L(x): {position[0]:.6f}, L(y): {position[1]:.6f}, L(z): {position[2]:.6f}, "
              f"V(x): {velocity[0]:.6f}, V(y): {velocity[1]:.6f}, V(z): {velocity[2]:.6f}")

if __name__ == '__main__':
    main()


**FINAL IMPLEMENTED SOLUTION TARGETS**:

**THIS IS THE FINAL CODE THAT I GENERATED AFTER GAINING A DEEPER UNDERSTANDING ON THESE MODULES**

- **Performance Optimization**: The code is optimized for CPU and RAM usage using Dask for parallel processing.
- **Distributed Computing**: The Dask distributed client is used to distribute the computation across multiple workers, significantly reducing computation time.
- **Modularized Code**: The code is organized into modular functions, making it easy to maintain and extend. The main execution block ensures that the code can run as a script or be imported as a module.

In [16]:
import numpy as np
import pandas as pd
from sgp4.api import Satrec, jday
from datetime import datetime, timedelta
import pyproj
import dask.dataframe as dd
from dask.distributed import Client, progress
import time
import dask.delayed as delayed

# Initialize Dask client
client = Client()

# Step 1: Load TLE data using Dask
def load_tle(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    tle_data = [(lines[i].strip(), lines[i+1].strip(), lines[i+2].strip()) for i in range(0, len(lines), 3)]
    return tle_data

# Step 2: Calculate satellite positions using SGP4 library (parallelized)
@delayed
def calculate_positions_parallel(tle_data, start_date, end_date, interval_minutes=1):
    positions = []
    for name, tle_line1, tle_line2 in tle_data:
        satellite = Satrec.twoline2rv(tle_line1, tle_line2)
        for minute in range(0, int((end_date - start_date).total_seconds() / 60), interval_minutes):
            current_time = start_date + timedelta(minutes=minute)
            jd, fr = jday(current_time.year, current_time.month, current_time.day, current_time.hour, current_time.minute, current_time.second)
            e, r, v = satellite.sgp4(jd, fr)
            if e == 0:
                positions.append([current_time, *r, *v])  # Store only the position vectors
    return positions

# Step 3: Convert ECEF to lat, long, alt using pyproj
def batch_ecef2lla(pos_x, pos_y, pos_z):
    ecef = pyproj.Proj(proj="geocent", ellps="WGS84", datum="WGS84")
    lla = pyproj.Proj(proj="latlong", ellps="WGS84", datum="WGS84")
    lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
    return lon, lat, alt

# Step 4: Filter positions based on user-defined geographic boundaries
def filter_positions(positions, lat_min, lat_max, lon_min, lon_max):
    filtered_positions = [pos for pos in positions if lat_min <= pos[2] <= lat_max and lon_min <= pos[1] <= lon_max]
    return filtered_positions

# Step 5: Optimize for performance with Dask
def process_tle_chunk(tle_chunk, start_date, end_date):
    positions = calculate_positions_parallel(tle_chunk, start_date, end_date)
    return positions

def optimize_and_run(tle_data, start_date, end_date, workers=4):
    chunk_size = len(tle_data) // workers
    chunks = [tle_data[i:i + chunk_size] for i in range(0, len(tle_data), chunk_size)]
    futures = [client.submit(process_tle_chunk, chunk, start_date, end_date) for chunk in chunks]
    positions = client.gather(futures)
    return positions

# Execution and demonstration
if __name__ == "__main__":
    start_time = time.time()

    tle_file = '/content/30sats.txt'
    start_date = datetime(2024, 6, 1, 0, 0, 0)
    end_date = datetime(2024, 6, 2, 0, 0, 0)

    print("Loading TLE data...")
    tle_data = load_tle(tle_file)

    print("Calculating positions...")
    positions_start_time = time.time()
    positions = optimize_and_run(tle_data, start_date, end_date)
    positions = client.compute(positions)  # Ensure positions are computed
    positions_end_time = time.time()
    print(f"Position calculation took {positions_end_time - positions_start_time} seconds.")

    print("Converting positions to lat/long/alt...")
    lla_positions = []
    for future_pos in positions:
        pos_list = future_pos.result()  # Get the actual result from the future
        for pos in pos_list:
            lon, lat, alt = batch_ecef2lla(*pos[1:4])  # Unpack position vector and pass to conversion function
            lla_positions.append([pos[0], lon, lat, alt])

    print("Filtering positions...")
    lat_min, lat_max = -90, 90
    lon_min, lon_max = -180, 180
    filtered_positions = filter_positions(lla_positions, lat_min, lat_max, lon_min, lon_max)

    print("Saving results...")
    df = pd.DataFrame(filtered_positions, columns=['Time', 'Longitude', 'Latitude', 'Altitude'])
    df.to_csv('satellite_positions.csv', index=False)

    end_time = time.time()
    print(f"Total runtime: {end_time - start_time} seconds.")




Perhaps you already have a cluster running?
Hosting the HTTP server on port 41337 instead
INFO:distributed.scheduler:State start
INFO:distributed.scheduler:  Scheduler at:     tcp://127.0.0.1:32873
INFO:distributed.scheduler:  dashboard at:  http://127.0.0.1:41337/status
INFO:distributed.nanny:Worker process 1754 exited with status 1
INFO:distributed.nanny:Closing Nanny at 'tcp://127.0.0.1:41515'. Reason: nanny-close-gracefully
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:41803'
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:44601'
INFO:distributed.nanny:Worker process 8302 exited with status 1
INFO:distributed.nanny:Closing Nanny at 'tcp://127.0.0.1:35797'. Reason: nanny-close-gracefully
INFO:distributed.nanny:Worker process 3884 exited with status 1
INFO:distributed.nanny:Closing Nanny at 'tcp://127.0.0.1:46367'. Reason: nanny-close-gracefully
INFO:distributed.nanny:Worker process 3550 exited with status 1
INFO:distributed.nanny:Closing Nanny at 't

Loading TLE data...
Calculating positions...
Position calculation took 0.09366416931152344 seconds.
Converting positions to lat/long/alt...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transform(ecef, lla, pos_x, pos_y, pos_z, radians=False)
  lon, lat, alt = pyproj.transfor

Filtering positions...
Saving results...
Total runtime: 147.6827940940857 seconds.


**KINDLY NOTE THAT THE RESULTS ARE STORED IN satellite_positions.csv

All in all it was a great learning opportunity.

Thank You



In [2]:
pip install sgp4

Collecting sgp4
  Downloading sgp4-2.23-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (232 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/232.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.3/232.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sgp4
Successfully installed sgp4-2.23
