In [None]:
# A Deeper NN having 7M Parameters and dataset size as 1.6M Datapairs of input coefficients varying from -5 to 5 with integer spacing and 5 roots (Real and Imaginary separated so 10 roots)

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

# Load the data
file_path = r"C:\Users\Akshay Patil\Desktop\Roots\R.csv"
df = pd.read_csv(file_path)

print(f"Dataset shape: {df.shape}")
print(f"Dataset columns: {df.columns.tolist()}")
print("\nFirst few rows:")
print(df.head())

# Extract input features (polynomial coefficients a, b, c, d, e, f)
input_cols = ['a', 'b', 'c', 'd', 'e', 'f']
X = df[input_cols].values

# Helper function to safely extract complex parts
def extract_complex_parts(complex_value):
    """
    Extract real and imaginary parts from complex number or string
    """
    try:
        if isinstance(complex_value, str):
            # Handle string representation of complex numbers
            complex_value = complex_value.strip()
            complex_num = complex(complex_value)
        elif isinstance(complex_value, (int, float)):
            complex_num = complex(complex_value)
        else:
            complex_num = complex(complex_value)
        
        return np.real(complex_num), np.imag(complex_num)
    except (ValueError, TypeError):
        print(f"Warning: Could not parse complex number: {complex_value}")
        return 0.0, 0.0

# Extract output features (roots with real and imaginary parts)
# # Method 1: If roots are stored as separate real/imaginary columns
root_cols = []
# for i in range(1, 6):  # 5 roots
#     for part in ['real', 'imag']:
#         col_name = f'root_{i}_{part}'
#         if col_name in df.columns:
#             root_cols.append(col_name)

# Extract outputs based on column structure
if len(root_cols) == 10:
    y = df[root_cols].values
    print("Using separate real/imaginary columns")
else:
    # Method 2: If roots are stored as complex numbers
    print("Processing complex number columns")
    y = np.zeros((len(df), 10))
    
    for i in range(5):
        root_col = f'root_{i+1}'
        if root_col in df.columns:
            # Process each complex value in the column
            for j, complex_value in enumerate(df[root_col].values):
                real_part, imag_part = extract_complex_parts(complex_value)
                y[j, i] = real_part      # Real parts (columns 0-4)
                y[j, i+5] = imag_part    # Imaginary parts (columns 5-9)

print(f"Input shape: {X.shape}")
print(f"Output shape: {y.shape}")

# Check for any NaN or infinite values
print(f"NaN values in X: {np.isnan(X).sum()}")
print(f"NaN values in y: {np.isnan(y).sum()}")
print(f"Infinite values in X: {np.isinf(X).sum()}")
print(f"Infinite values in y: {np.isinf(y).sum()}")

# Remove any rows with NaN or infinite values
valid_indices = ~(np.isnan(X).any(axis=1) | np.isnan(y).any(axis=1) | 
                  np.isinf(X).any(axis=1) | np.isinf(y).any(axis=1))
X = X[valid_indices]
y = y[valid_indices]

print(f"After cleaning - X shape: {X.shape}, y shape: {y.shape}")

# Split the data into train, validation, and test sets (60%, 20%, 20%)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.1, random_state=42)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

# Standardize the features
# scaler_X = StandardScaler()
# scaler_y = StandardScaler()

# X_train_scaled = scaler_X.fit_transform(X_train)
# X_val_scaled = scaler_X.transform(X_val)
# X_test_scaled = scaler_X.transform(X_test)

# y_train_scaled = scaler_y.fit_transform(y_train)
# y_val_scaled = scaler_y.transform(y_val)
# y_test_scaled = scaler_y.transform(y_test)

print("Data preprocessing completed successfully!")

model = keras.Sequential([
    # Input layer + First hidden layer
    keras.layers.Dense(512, activation='relu', input_shape=(6,), name='hidden_layer_1'),
    keras.layers.BatchNormalization(),
    
    # Layer 2
    keras.layers.Dense(768, activation='relu', name='hidden_layer_2'),
    keras.layers.BatchNormalization(),

    
    # Layer 3
    keras.layers.Dense(1024, activation='relu', name='hidden_layer_3'),
    keras.layers.BatchNormalization(),

    
    # Layer 4
    keras.layers.Dense(1024, activation='relu', name='hidden_layer_4'),
    keras.layers.BatchNormalization(),

    
    # Layer 5
    keras.layers.Dense(1024, activation='relu', name='hidden_layer_5'),
    keras.layers.BatchNormalization(),

    
    # Layer 6
    keras.layers.Dense(768, activation='relu', name='hidden_layer_6'),
    keras.layers.BatchNormalization(),

    
    # Layer 7
    keras.layers.Dense(768, activation='relu', name='hidden_layer_7'),
    keras.layers.BatchNormalization(),

    
    # Layer 8
    keras.layers.Dense(768, activation='relu', name='hidden_layer_8'),
    keras.layers.BatchNormalization(),

    # Layer 9
    keras.layers.Dense(512, activation='relu', name='hidden_layer_9'),
    keras.layers.BatchNormalization(),

    
    # Layer 10
    keras.layers.Dense(512, activation='relu', name='hidden_layer_10'),
    keras.layers.BatchNormalization(),

    
    # Layer 11
    keras.layers.Dense(512, activation='relu', name='hidden_layer_11'),
    keras.layers.BatchNormalization(),

    
    # Layer 12
    keras.layers.Dense(384, activation='relu', name='hidden_layer_12'),
    keras.layers.BatchNormalization(),

    
    # Layer 13
    keras.layers.Dense(384, activation='relu', name='hidden_layer_13'),
    keras.layers.BatchNormalization(),

    
    # Layer 14
    keras.layers.Dense(384, activation='relu', name='hidden_layer_14'),
    keras.layers.BatchNormalization(),

    
    # Layer 15
    keras.layers.Dense(256, activation='relu', name='hidden_layer_15'),
    keras.layers.BatchNormalization(),

    
    # Layer 16
    keras.layers.Dense(256, activation='relu', name='hidden_layer_16'),
    keras.layers.BatchNormalization(),

    
    # Layer 17
    keras.layers.Dense(256, activation='relu', name='hidden_layer_17'),
    keras.layers.BatchNormalization(),

    
    # Layer 18
    keras.layers.Dense(128, activation='relu', name='hidden_layer_18'),
    keras.layers.BatchNormalization(),

    
    # Layer 19
    keras.layers.Dense(128, activation='relu', name='hidden_layer_19'),
    keras.layers.BatchNormalization(),

    # Layer 20
    keras.layers.Dense(64, activation='relu', name='hidden_layer_20'),
    keras.layers.BatchNormalization(),

    
    # Output layer (10 outputs internally: 5 real parts + 5 imaginary parts)
    keras.layers.Dense(10, activation='linear', name='output_layer')
])

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

# Display model architecture
model.summary()

# Define callbacks for training
callbacks = [
    # keras.callbacks.EarlyStopping(
    #     monitor='val_loss',
    #     patience=5,
    #     restore_best_weights=True,
    #     verbose=1
    # ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=7,
        min_lr=1e-7,
        verbose=1
    ),
    keras.callbacks.ModelCheckpoint(
        'best_polynomial_roots_model.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    )
]

# Train the model
print("Starting training...")
history = model.fit(
    X_train, y_train,
    batch_size=10000,
    epochs=100,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

print("Training completed!")

# Load the best model and evaluate
model = keras.models.load_model('best_polynomial_roots_model.keras')

# Evaluate on test set
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss (MSE): {test_loss:.6f}")
print(f"Test MAE: {test_mae:.6f}")

# Create prediction function
def predict_polynomial_roots(coefficients):
    """
    Predict roots for a polynomial given coefficients [a,b,c,d,e,f]
    where polynomial is: a*x^5 + b*x^4 + c*x^3 + d*x^2 + e*x + f = 0
    """
    # Ensure input is numpy array
    coeffs = np.array(coefficients).reshape(1, -1)
    
    # Scale the input
    # coeffs_scaled = scaler_X.transform(coeffs)
    
    # Predict
    prediction_scaled = model.predict(coeffs, verbose=0)
    
    # Inverse scale the output
    # prediction = scaler_y.inverse_transform(prediction_scaled)
    
    # Reshape to get real and imaginary parts
    real_parts = prediction_scaled[0][:5]
    imag_parts = prediction_scaled[0][5:]
    
    # Combine into complex roots
    roots = real_parts + 1j * imag_parts
    
    return roots, real_parts, imag_parts

# Test the prediction function
sample_coeffs = [1, -5, 8, -4, 0, 0]
predicted_roots, real_parts, imag_parts = predict_polynomial_roots(sample_coeffs)

print(f"Input polynomial coefficients: {sample_coeffs}")
print(f"Predicted roots:")
for i, root in enumerate(predicted_roots):
    print(f"  Root {i+1}: {root:.4f}")

# Save model components
# joblib.dump(scaler_X, 'scaler_X.pkl')
# joblib.dump(scaler_y, 'scaler_y.pkl')

print("Model and scalers saved successfully!")
print("\nTo use the trained model in a new session:")
print("1. model = keras.models.load_model('best_polynomial_roots_model.keras')")
print("2. scaler_X = joblib.load('scaler_X.pkl')")
print("3. scaler_y = joblib.load('scaler_y.pkl')")

# Additional utility functions for comprehensive usage
def load_trained_model():
    """Load the trained model and scalers for inference"""
    model = keras.models.load_model('best_polynomial_roots_model.keras')
    # scaler_X = joblib.load('scaler_X.pkl')
    # scaler_y = joblib.load('scaler_y.pkl')
    return model#, scaler_X, scaler_y

def batch_predict_roots(coefficient_list):
    """Predict roots for multiple polynomials at once"""
    coeffs_array = np.array(coefficient_list)
    # coeffs_scaled = scaler_X.transform(coeffs_array)
    
    predictions_scaled = model.predict(coeffs_array, verbose=0)
    # predictions = scaler_y.inverse_transform(predictions_scaled)
    
    results = []
    for i, prediction in enumerate(predictions_scaled):
        real_parts = prediction[:5]
        imag_parts = prediction[5:]
        roots = real_parts + 1j * imag_parts
        results.append({
            'coefficients': coefficient_list[i],
            'roots': roots,
            'real_parts': real_parts,
            'imaginary_parts': imag_parts
        })
    
    return results

def verify_polynomial_roots(coeffs, roots, tolerance=1e-6):
    """
    Verify that the predicted roots actually satisfy the polynomial equation
    """
    a, b, c, d, e, f = coeffs
    verification_results = []
    
    for i, root in enumerate(roots):
        # Calculate polynomial value at root
        poly_value = a*root**5 + b*root**4 + c*root**3 + d*root**2 + e*root + f
        error = abs(poly_value)
        is_valid = error < tolerance
        
        verification_results.append({
            'root_index': i+1,
            'root_value': root,
            'polynomial_value': poly_value,
            'error': error,
            'is_valid_root': is_valid
        })
    
    return verification_results

# Example usage for batch prediction
sample_batch = [
    [1, -5, 8, -4, 0, 0],
    [10, 20, -31, 0, 11, -2],
    [12, -51, 82, -4, 2, 2],
    [-2, 1, -3, 4, 1, -2],
    [1, -5, 8, -4, 0, 0],
    [2, 1, -3, 0, 1, -2],
    [1, -5, 8, -4, 0, 0],
    [2, 1, -3, 0, 1, -2],
    [1, 0, 0, 0, 0, -1]
]

batch_results = batch_predict_roots(sample_batch)
print("\nBatch prediction results:")
for i, result in enumerate(batch_results):
    print(f"Polynomial {i+1}: {result['coefficients']}")
    print(f"Roots: {result['roots']}")
    
    # Verify the roots
    verification = verify_polynomial_roots(result['coefficients'], result['roots'])
    print("Root verification:")
    for v in verification:
        status = "✓" if v['is_valid_root'] else "✗"
        print(f"  {status} Root {v['root_index']}: Error = {v['error']:.2e}")
    print()

print("Code execution completed successfully!")

# The errors in deep NN for the function approximation are 0.02 to 2 while predicting the roots ,
# implying that either the architecture needs update or data of the coefficients should be more and
# may include decimal coefficients instead of just integer coefficients as in the data...