In [None]:
import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, LeakyReLU
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import CategoricalCrossentropy
import ast  

# Load Model 1 & Model 2
model_1 = load_model('stage_1_model.h5')
model_2 = load_model('stage_2_neigh_model.h5')

# Load the same scaler used for training
scaler = MinMaxScaler()

# File path for logging
csv_file = 'satellite_predictions_per_timestep.csv'

# If the file does not exist, create it with headers
if not os.path.exists(csv_file):
    pd.DataFrame(columns=['timestamp', 'satellite', 'predicted_gateways']).to_csv(csv_file, index=False)

# Function to generate filenames
def generate_time_file_names(start_time_str, end_time_str, folder_path, prefix='optimal_file_data_'):
    start_time = datetime.strptime(start_time_str, '%H_%M_%S')
    end_time = datetime.strptime(end_time_str, '%H_%M_%S')
    current_time = start_time
    while current_time <= end_time:
        time_str = current_time.strftime('%H_%M_%S')
        file_name = f"{prefix}{time_str}.csv"
        yield os.path.join(folder_path, file_name)
        current_time += timedelta(seconds=20)

# Function to process each file
def process_file(file_name):
    if not os.path.exists(file_name):
        return None, None, None, None, None

    timestamp = file_name.split('_')[-1].replace('.csv', '')  # Extract timestamp

    optimal_data = pd.read_csv(file_name, usecols=['feed_sat', 'Latitude', 'Longitude', 'Altitude', 'optimal_gateway_matrix'])
    optimal_data.drop_duplicates(subset=['feed_sat'], inplace=True)

    if optimal_data.empty:
        return None, None, None, None, None  # Skip empty files

    optimal_data[['Latitude', 'Longitude', 'Altitude']] = optimal_data[['Latitude', 'Longitude', 'Altitude']].astype(float)

    sat_positions = {feed_sat: np.array([lat, lon, alt], dtype=np.float32) 
                     for feed_sat, lat, lon, alt in zip(optimal_data['feed_sat'], 
                                                        optimal_data['Latitude'], 
                                                        optimal_data['Longitude'], 
                                                        optimal_data['Altitude'])}

    optimal_data['optimal_gateway_matrix'] = optimal_data['optimal_gateway_matrix'].apply(lambda x: np.array(ast.literal_eval(x), dtype=np.float32))
    sat_labels = dict(zip(optimal_data['feed_sat'], optimal_data['optimal_gateway_matrix']))
    
    sat_list = list(sat_positions.keys())

    if not sat_positions:
        return None, None, None, None, None  # Skip empty datasets

    X_input = np.array(list(sat_positions.values()), dtype=np.float32)

    if X_input.shape[1] == 3:
        X_input = np.hstack([X_input, np.zeros((X_input.shape[0], 1))])

    # Normalize input for Model 1
    X_input = scaler.fit_transform(X_input)

    top_3_preds = get_top_3_predictions(model_1, X_input)
    sat_top_3_map = {sat: pred for sat, pred in zip(sat_list, top_3_preds)}

    sat_neighbors = {}
    for sat, pos in sat_positions.items():
        distances = [(other_sat, np.linalg.norm(pos - sat_positions[other_sat])) 
                     for other_sat in sat_list if other_sat != sat]
        nearest_neighbors = sorted(distances, key=lambda x: x[1])[:4]
        sat_neighbors[sat] = [sat_positions[n[0]] for n in nearest_neighbors]

    return sat_positions, sat_neighbors, sat_top_3_map, sat_labels, timestamp

# Function to get top-3 predictions
def get_top_3_predictions(model, X):
    predictions = model.predict(X, batch_size=256, verbose=0)
    top_3_indices = np.argsort(predictions, axis=1)[:, -3:]
    top_3_probs = np.take_along_axis(predictions, top_3_indices, axis=1)
    return np.hstack((top_3_indices, top_3_probs))

# Training settings
folder_path = r"C:\Users\aruna\Desktop\MS Thesis\Real Data\Files with position"
train_files = list(generate_time_file_names('16_00_00', '19_00_00', folder_path))
test_files = list(generate_time_file_names('19_00_00', '20_30_00', folder_path))

# Set mode flag to track whether training or testing is being processed
processing_mode = None  

X_train, y_train = [], []
batch_predictions = []  # Store batch results before writing

# Process training data
print("\n🟢 Processing Training Files...\n")
for i, file_name in enumerate(train_files):
    sat_positions, sat_neighbors, sat_top_3_map, sat_labels, timestamp = process_file(file_name)
    if sat_positions is None:
        continue  

    for sat in sat_positions.keys():
        sat_pos = sat_positions[sat]
        neighbor_pos = sat_neighbors[sat]
        top_3_preds = sat_top_3_map[sat]
        y_label = sat_labels[sat]

        feature_vector = np.concatenate([sat_pos] + neighbor_pos + [top_3_preds])
        X_train.append(feature_vector)
        y_train.append(y_label)

        batch_predictions.append({'timestamp': timestamp, 'satellite': sat, 'predicted_gateways': top_3_preds.tolist()})

    if i % 50 == 0 and batch_predictions:
        df_predictions = pd.DataFrame(batch_predictions)
        df_predictions.to_csv(csv_file, mode='a', header=False, index=False)
        batch_predictions = []  

if batch_predictions:
    df_predictions = pd.DataFrame(batch_predictions)
    df_predictions.to_csv(csv_file, mode='a', header=False, index=False)

X_train = np.array(X_train)
y_train = np.vstack(y_train)

# Ensure Scaler is Fitted on 21 Features
assert X_train.shape[1] == 21, "Training data should have 21 features!"
scaler.fit(X_train)
X_train = scaler.transform(X_train)

# Process testing data
print("\n🔵 Processing Testing Files...\n")
X_test, y_test = [], []
for file_name in test_files:
    sat_positions, sat_neighbors, sat_top_3_map, sat_labels, timestamp = process_file(file_name)
    if sat_positions is None:
        continue  

    for sat in sat_positions.keys():
        sat_pos = sat_positions[sat]
        neighbor_pos = sat_neighbors[sat]
        top_3_preds = sat_top_3_map[sat]
        y_label = sat_labels[sat]

        feature_vector = np.concatenate([sat_pos] + neighbor_pos + [top_3_preds])
        X_test.append(feature_vector)
        y_test.append(y_label)

X_test = np.array(X_test)
y_test = np.vstack(y_test)

# Ensure Test Data Has 21 Features Before Scaling
assert X_test.shape[1] == 21, f"Test data should have 21 features but has {X_test.shape[1]}!"
X_test = scaler.transform(X_test)

# Train Model 2
history = model_2.fit(
    X_train, y_train,
    epochs=50,
    batch_size=256,
    validation_data=(X_test, y_test),
    verbose=1
)

# Save Model
model_2.save('stage_2_neigh_model.h5')

print("\n✅ Model training and satellite predictions saved efficiently!")
