Use this code to explore the Celestrack CSV file. 

In [None]:
import pandas as pd


df = pd.read_csv('ionosphere_central/celestrak/SW-All.csv')

# Print the header (column names)
print(df.columns.tolist())

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Make sure 'DATE' is parsed as datetime (assumes YYYY-MM-DD or similar)
df['DATE'] = pd.to_datetime(df['DATE'])

# List of columns you can plot (excluding 'DATE')
columns_to_plot = [col for col in df.columns if col != 'DATE']

# --- SIMPLE SELECTION: set this to any column in the header ---
# Example: value_to_plot = "KP_SUM"
#value_to_plot = "KP_SUM"

# Or: to select interactively, uncomment below (needs Jupyter or IPython)
value_to_plot = input(f"Select a column to plot {columns_to_plot}: ")

if value_to_plot not in columns_to_plot:
    raise ValueError(f"{value_to_plot} not found in columns: {columns_to_plot}")

# Plot
plt.figure(figsize=(12, 6))
plt.plot(df['DATE'], df[value_to_plot], marker='o')
plt.xlabel('Date')
plt.ylabel(value_to_plot)
plt.title(f"{value_to_plot} over Time")
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load CSV
csv_file = 'ionosphere_central/celestrak/SW-All.csv'
df = pd.read_csv(csv_file)
df['DATE'] = pd.to_datetime(df['DA
                               TE'])

# --- SELECT DATE TO PLOT (edit this) ---
selected_date = "2024-05-10"  # <-- change to your desired date (YYYY-MM-DD format)

# Filter data for the selected date
row = df[df['DATE'] == selected_date]

if row.empty:
    raise ValueError(f"No data for selected date: {selected_date}")

# KP1..KP8 columns
kp_cols = [f'KP{i}' for i in range(1, 9)]
kp_values = row[kp_cols].iloc[0].values.astype(float)

# Combined KP label and time
time_labels = [
    "00:00", "03:00", "06:00", "09:00", "12:00", "15:00", "18:00", "21:00"
]
xticks = [f"{kp}\n{t}" for kp, t in zip(kp_cols, time_labels)]

colors = plt.cm.tab10.colors

plt.figure(figsize=(10, 5))
plt.plot(range(8), kp_values, color='black', marker='o', linewidth=2, label="Kp Index")
# Add a colored marker for each KP
for i, val in enumerate(kp_values):
    plt.scatter(i, val, color=colors[i], s=100, label=kp_cols[i] if i==0 else "")
plt.xticks(range(8), xticks)
plt.xlabel('Kp Interval and Time (UTC)')
plt.ylabel('Kp Index Value')
plt.title(f'Kp Index Values for {selected_date}')
plt.ylim(0, max(kp_values) + 1)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load CSV
csv_file = 'ionosphere_central/celestrak/SW-All.csv'
df = pd.read_csv(csv_file)
df['DATE'] = pd.to_datetime(df['DATE'])

# --- SELECT DATE TO PLOT (edit this) ---
selected_date = "2024-05-11"  # <-- change to your desired date (YYYY-MM-DD format)

# Filter data for the selected date
row = df[df['DATE'] == selected_date]

if row.empty:
    raise ValueError(f"No data for selected date: {selected_date}")

# KP1..KP8 columns
kp_cols = [f'KP{i}' for i in range(1, 9)]
kp_values = row[kp_cols].iloc[0].values.astype(float)

# Combined KP label and time
time_labels = [
    "00:00", "03:00", "06:00", "09:00", "12:00", "15:00", "18:00", "21:00"
]
xticks = [f"{kp}\n{t}" for kp, t in zip(kp_cols, time_labels)]

colors = plt.cm.tab10.colors

plt.figure(figsize=(10, 5))
plt.plot(range(8), kp_values, color='black', marker='o', linewidth=2, label="Kp Index")
# Add a colored marker for each KP
for i, val in enumerate(kp_values):
    plt.scatter(i, val, color=colors[i], s=100, label=kp_cols[i] if i==0 else "")
plt.xticks(range(8), xticks)
plt.xlabel('Kp Interval and Time (UTC)')
plt.ylabel('Kp Index Value')
plt.title(f'Kp Index Values for {selected_date}')
plt.ylim(0, max(kp_values) + 1)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

Now combine both Kp and Ap indexs to be used 1D arrray

In [None]:
import pandas as pd
import xarray as xr

# Load the CSV
csv_file = 'ionosphere_central/celestrak/SW-All.csv'
df = pd.read_csv(csv_file)
df['DATE'] = pd.to_datetime(df['DATE'])

# KP and AP columns
kp_cols = [f'KP{i}' for i in range(1, 9)]
ap_cols = [f'AP{i}' for i in range(1, 9)]

# Check that all required columns exist
for cols in [kp_cols, ap_cols]:
    for col in cols:
        if col not in df.columns:
            raise ValueError(f"Missing column: {col}")

# Stack KP and AP values as flat vectors (row-major order: day1 KP1..KP8, day2 KP1..KP8, ...)
kp_flat = df[kp_cols].values.flatten()
ap_flat = df[ap_cols].values.flatten()

# Create xarray Dataset
ds_flat = xr.Dataset(
    data_vars=dict(
        kp=(["datetime_index"], kp_flat),
        ap=(["datetime_index"], ap_flat),
    ),
    attrs=dict(
        description="Flattened KP and AP indices (all dates, all intervals)",
    ),
)

# Save or inspect
print(ds_flat)
ds_flat.to_netcdf('kp_ap_flat.nc')


Now Try to combine data into a 2D array maybe there is a differnce later. 

In [None]:
import pandas as pd
import xarray as xr

# Load the CSV
csv_file = 'ionosphere_central/celestrak/SW-All.csv'
df = pd.read_csv(csv_file)
df['DATE'] = pd.to_datetime(df['DATE'])

# KP and AP columns
kp_cols = [f'KP{i}' for i in range(1, 9)]
ap_cols = [f'AP{i}' for i in range(1, 9)]

# Check that all required columns exist
for cols in [kp_cols, ap_cols]:
    for col in cols:
        if col not in df.columns:
            raise ValueError(f"Missing column: {col}")

# 2D arrays: (n_dates, 8)
kp_2d = df[kp_cols].values
ap_2d = df[ap_cols].values

# Create xarray Dataset
ds_2d = xr.Dataset(
    data_vars=dict(
        kp=(["time", "interval"], kp_2d),
        ap=(["time", "interval"], ap_2d),
    ),
    coords=dict(
        time=df['DATE'].values,
        interval=[f"{3*i:02d}:00" for i in range(8)]
    ),
    attrs=dict(
        description="KP and AP indices as 2D arrays (time x 3-hour interval)",
    ),
)

# Save or inspect
print(ds_2d)
ds_2d.to_netcdf('kp_ap_2d.nc')


Plot 1D array to see if it worked. 

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Open the NetCDF file
ds = xr.open_dataset('kp_ap_flat.nc')

# Get the flat Kp and Ap vectors
kp = ds['kp'].values
ap = ds['ap'].values

# How many 3-hour intervals per day
intervals_per_day = 8

# (OPTIONAL) If you know the start date and number of days, reconstruct time:
# Replace with your known start date and count of days
start_date = "2023-01-01"  # <-- Change as needed
num_days = len(kp) // intervals_per_day

# Build datetime index (pandas)
times = pd.date_range(start=start_date, periods=len(kp), freq='3H')

# Plotting
plt.figure(figsize=(15, 5))
plt.plot(times, kp, label='Kp Index', color='tab:blue')
plt.plot(times, ap, label='Ap Index', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('Index Value')
plt.title('Kp and Ap Indices (3-hour intervals)')
plt.legend()
plt.tight_layout()
plt.show()


Plot for only 3 days to see if it really worked. 

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Open the NetCDF file
ds = xr.open_dataset('kp_ap_flat.nc')

kp = ds['kp'].values
ap = ds['ap'].values

intervals_per_day = 8
num_days_to_plot = 3
num_points = intervals_per_day * num_days_to_plot

# (OPTIONAL) Set your start date
start_date = "2024-5-11"  # <-- edit as needed

# Make time index for the selected period
times = pd.date_range(start=start_date, periods=num_points, freq='3H')

# Slice for first three days
kp_slice = kp[:num_points]
ap_slice = ap[:num_points]
times_slice = times[:num_points]

# Plot
plt.figure(figsize=(12, 5))
plt.plot(times_slice, kp_slice, marker='o', label='Kp Index', color='tab:blue')
plt.plot(times_slice, ap_slice, marker='s', label='Ap Index', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('Index Value')
plt.title(f'Kp and Ap Indices (First {num_days_to_plot} Days)')
plt.legend()
plt.tight_layout()
plt.show()


Now plot the 2d array for 3 days. 

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd

# Open the NetCDF file
ds = xr.open_dataset('kp_ap_2d.nc')  # update filename as needed

# User: set starting date and number of days
selected_start_date = "2024-05-10"   # <-- set to your desired date (YYYY-MM-DD)
num_days_to_plot = 3                 # number of days to plot

# Ensure the 'time' coordinate is in pandas datetime format
all_dates = pd.to_datetime(ds['time'].values)

# Find index of the starting date
try:
    idx_start = all_dates.get_loc(pd.to_datetime(selected_start_date))
except KeyError:
    raise ValueError(f"Start date {selected_start_date} not found in dataset!")

idx_end = idx_start + num_days_to_plot

# Slice KP and AP
kp = ds['kp'].isel(time=slice(idx_start, idx_end)).values   # shape: (num_days_to_plot, 8)
ap = ds['ap'].isel(time=slice(idx_start, idx_end)).values

# Interval labels (e.g. ['00:00', '03:00', ...])
interval_labels = ds['interval'].values

# Build a 3-hourly datetime index for the selected days
time_index = []
for day in all_dates[idx_start:idx_end]:
    for interval in interval_labels:
        dt = pd.to_datetime(f"{day.date()} {interval}")
        time_index.append(dt)
time_index = pd.to_datetime(time_index)

# Flatten for plotting
kp_flat = kp.flatten()
ap_flat = ap.flatten()

# Plot
plt.figure(figsize=(12, 5))
plt.plot(time_index, kp_flat, marker='o', label='Kp Index', color='tab:blue')
plt.plot(time_index, ap_flat, marker='s', label='Ap Index', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('Index Value')
plt.title(f'Kp and Ap Indices ({selected_start_date} + {num_days_to_plot-1} days)')
plt.legend()
plt.tight_layout()
plt.show()


now plot with log Ap

import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Open the NetCDF file
ds = xr.open_dataset('kp_ap_2d.nc')  # update filename as needed

# --- Parameters ---
selected_start_date = "2024-05-10"  # <-- set to your desired date (YYYY-MM-DD)
num_days_to_plot = 3

# --- Prepare Dates ---
all_dates = pd.to_datetime(ds['time'].values)
try:
    idx_start = all_dates.get_loc(pd.to_datetime(selected_start_date))
except KeyError:
    raise ValueError(f"Start date {selected_start_date} not found in dataset!")
idx_end = idx_start + num_days_to_plot

# --- Slice Data ---
kp = ds['kp'].isel(time=slice(idx_start, idx_end)).values   # shape: (num_days_to_plot, 8)
ap = ds['ap'].isel(time=slice(idx_start, idx_end)).values
interval_labels = ds['interval'].values

# --- Build time index ---
time_index = []
for day in all_dates[idx_start:idx_end]:
    for interval in interval_labels:
        dt = pd.to_datetime(f"{day.date()} {interval}")
        time_index.append(dt)
time_index = pd.to_datetime(time_index)

# --- Flatten data ---
kp_flat = kp.flatten()
ap_flat = ap.flatten()
log_ap_flat = np.log10(ap_flat + 1e-6)  # Avoid log(0), adjust epsilon as needed

# --- Plot ---
fig, ax1 = plt.subplots(figsize=(12, 5))

color_kp = 'tab:blue'
color_ap = 'tab:orange'

# Kp: Left Y axis
ax1.plot(time_index, kp_flat, marker='o', color=color_kp, label='Kp Index')
ax1.set_xlabel('Time')
ax1.set_ylabel('Kp Index', color=color_kp)
ax1.tick_params(axis='y', labelcolor=color_kp)

# log10(Ap): Right Y axis
ax2 = ax1.twinx()
ax2.plot(time_index, log_ap_flat, marker='s', color=color_ap, label='log₁₀(Ap)')
ax2.set_ylabel('log₁₀(Ap)', color=color_ap)
ax2.tick_params(axis='y', labelcolor=color_ap)

# Title and legend
plt.title(f'Kp and log₁₀(Ap) Indices ({selected_start_date} + {num_days_to_plot-1} days)')
fig.tight_layout()

# Combined legend
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax1.legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper left')

plt.show()
