# Planetary Eclipse Prediction Analysis

This notebook analyzes the performance of our Planetary Eclipse prediction model, visualizes historical and predicted eclipses for various planets, and explores patterns in planetary eclipse occurrences.

In [None]:
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.metrics import classification_report, confusion_matrix

from src.data_processing.data_loader import DataLoader
from src.models.planetary_eclipse_predictor import PlanetaryEclipsePredictor
from src.evaluation.prediction_evaluator import evaluate_planetary_eclipse_predictions

%matplotlib inline
plt.style.use('seaborn')

# List of planets we're analyzing
PLANETS = ['mercury', 'venus', 'mars', 'jupiter', 'saturn']

## 1. Load and Prepare Data

In [None]:
loader = DataLoader()
planetary_data = {planet: loader.load_planetary_eclipse_data(planet) for planet in PLANETS}

# Split data into training and testing sets
train_data = {}
test_data = {}
for planet in PLANETS:
    train_data[planet] = planetary_data[planet][planetary_data[planet]['date'] < '2020-01-01']
    test_data[planet] = planetary_data[planet][planetary_data[planet]['date'] >= '2020-01-01']

for planet in PLANETS:
    print(f"{planet.capitalize()}:")
    print(f"  Training data shape: {train_data[planet].shape}")
    print(f"  Testing data shape: {test_data[planet].shape}")

# Display sample of the data
print("\nSample of planetary eclipse data (Mercury):")
print(planetary_data['mercury'].head())

## 2. Explore Historical Planetary Eclipse Data

In [None]:
def plot_eclipse_distribution(data, planet):
    plt.figure(figsize=(10, 6))
    sns.countplot(x='eclipse_type', data=data)
    plt.title(f'Distribution of Eclipse Types for {planet.capitalize()}')
    plt.xlabel('Eclipse Type')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.show()

for planet in PLANETS:
    plot_eclipse_distribution(planetary_data[planet], planet)
    
    # Calculate and display eclipse frequency
    eclipse_frequency = planetary_data[planet]['eclipse_type'].value_counts(normalize=True)
    print(f"\nEclipse frequency for {planet.capitalize()}:")
    print(eclipse_frequency)
    print("\n" + "-"*50)

## 3. Train Planetary Eclipse Prediction Model

In [None]:
planetary_model = PlanetaryEclipsePredictor()

for planet in PLANETS:
    planetary_model.train(train_data[planet], planet)
    print(f"Model trained successfully for {planet.capitalize()}.")

## 4. Evaluate Model Performance

In [None]:
def evaluate_and_visualize(model, test_data, planet):
    predictions = model.predict(test_data['date'], planet)
    evaluation_results = evaluate_planetary_eclipse_predictions(test_data['eclipse_type'], predictions)
    
    print(f"Evaluation Results for {planet.capitalize()}:")
    for metric, value in evaluation_results.items():
        print(f"{metric}: {value}")
    
    print(f"\nClassification Report for {planet.capitalize()}:")
    print(classification_report(test_data['eclipse_type'], predictions))
    
    cm = confusion_matrix(test_data['eclipse_type'], predictions)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Confusion Matrix for {planet.capitalize()} Eclipse Types')
    plt.show()

for planet in PLANETS:
    evaluate_and_visualize(planetary_model, test_data[planet], planet)
    print("\n" + "-"*50)

## 5. Visualize Predictions vs Actual

In [None]:
def plot_predictions_vs_actual(model, test_data, planet):
    predictions = model.predict(test_data['date'], planet)
    
    plt.figure(figsize=(15, 8))
    plt.scatter(test_data['date'], test_data['eclipse_type'], alpha=0.5, label='Actual')
    plt.scatter(test_data['date'], predictions, alpha=0.5, label='Predicted')
    plt.xlabel('Date')
    plt.ylabel('Eclipse Type')
    plt.title(f'Actual vs Predicted Eclipse Types for {planet.capitalize()}')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

for planet in PLANETS:
    plot_predictions_vs_actual(planetary_model, test_data[planet], planet)

## 6. Analyze Prediction Errors

In [None]:
def analyze_errors(model, test_data, planet):
    predictions = model.predict(test_data['date'], planet)
    errors = test_data[test_data['eclipse_type'] != predictions].copy()
    errors['predicted'] = predictions[test_data['eclipse_type'] != predictions]
    
    print(f"Number of errors for {planet.capitalize()}: {len(errors)}")
    print("\nSample of prediction errors:")
    print(errors.head())
    
    # Analyze errors by eclipse type
    error_by_type = errors['eclipse_type'].value_counts(normalize=True)
    plt.figure(figsize=(10, 6))
    error_by_type.plot(kind='bar')
    plt.title(f'Distribution of Errors by Eclipse Type for {planet.capitalize()}')
    plt.xlabel('Eclipse Type')
    plt.ylabel('Error Rate')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

for planet in PLANETS:
    analyze_errors(planetary_model, test_data[planet], planet)
    print("\n" + "-"*50)

## 7. Predict Future Planetary Eclipses

In [None]:
def predict_future_eclipses(model, planet, start_date, end_date):
    future_dates = pd.date_range(start=start_date, end=end_date, freq='D')
    future_predictions = model.predict(future_dates, planet)
    
    future_eclipses = pd.DataFrame({'date': future_dates, 'predicted_eclipse': future_predictions})
    future_eclipses = future_eclipses[future_eclipses['predicted_eclipse'] != 'No Eclipse']
    
    print(f"Predicted Eclipses for {planet.capitalize()} ({start_date} to {end_date}):")
    print(future_eclipses)
    
    plt.figure(figsize=(15, 8))
    for eclipse_type in future_eclipses['predicted_eclipse'].unique():
        subset = future_eclipses[future_eclipses['predicted_eclipse'] == eclipse_type]
        plt.scatter(subset['date'], [eclipse_type] * len(subset), label=eclipse_type)
    plt.xlabel('Date')
    plt.ylabel('Eclipse Type')
    plt.title(f'Predicted Eclipses for {planet.capitalize()} ({start_date} to {end_date})')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

start_date = '2025-01-01'
end_date = '2030-12-31'

for planet in PLANETS:
    predict_future_eclipses(planetary_model, planet, start_date, end_date)
    print("\n" + "-"*50)

## 8. Analyze Planetary Eclipse Patterns

In [None]:
def analyze_eclipse_intervals(data, planet):
    data = data.sort_values('date')
    intervals = data['date'].diff().dt.days
    
    plt.figure(figsize=(12, 6))
    plt.hist(intervals, bins=30, edgecolor='black')
    plt.title(f'Distribution of Days Between Eclipses for {planet.capitalize()}')
    plt.xlabel('Days')
    plt.ylabel('Frequency')
    plt.show()
    
    print(f"Average days between eclipses for {planet.capitalize()}: {intervals.mean():.2f}")
    print(f"Median days between eclipses for {planet.capitalize()}: {intervals.median():.2f}")

for planet in PLANETS:
    print(f"Historical Eclipse Intervals for {planet.capitalize()}:")
    analyze_eclipse_intervals(planetary_data[planet], planet)
    print("\n" + "-"*50)

## 9. Comparative Analysis Across Planets

In [None]:
def compare_eclipse_frequencies():
    frequencies = {}
    for planet in PLANETS:
        frequencies[planet] = planetary_data[planet]['eclipse_type'].value_counts(normalize=True)
    
    df_frequencies = pd.DataFrame(frequencies)
    
    plt.figure(figsize=(12, 8))
    df_frequencies.plot(kind='bar')
    plt.title('Eclipse Type Frequencies Across Planets')
    plt.xlabel('Eclipse Type')
    plt.ylabel('Frequency')
    plt.legend(title='Planet')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
compare_eclipse_frequencies()
def compare_model_performance():
    performance = {}
    for planet in PLANETS:
        predictions = planetary_model.predict(test_data[planet]['date'], planet)
        performance[planet] = evaluate_planetary_eclipse_predictions(test_data[planet]['eclipse_type'], predictions)
    df_performance = pd.DataFrame(performance).T
    plt.figure(figsize=(12, 8))
    sns.heatmap(df_performance, annot=True, cmap='YlGnBu')
    plt.title('Model Performance Comparison Across Planets')
    plt.tight_layout()
    plt.show()
compare_model_performance()
print("Key observations from the comparative analysis:")
print("1. [Your observation about eclipse frequencies across planets]")
print("2. [Your observation about model performance differences]")
print("3. [Any other interesting patterns or anomalies you notice]")


## 10. Conclusion and Future Work

In this notebook, we've conducted a comprehensive analysis of planetary eclipse predictions across multiple planets in our solar system. Key findings include:

1. [Summarize overall model performance across planets]
2. [Discuss any common patterns or interesting differences in eclipse occurrences between planets]
3. [Highlight any particularly challenging aspects of planetary eclipse prediction]
4. [Mention any unexpected results or insights gained from the comparative analysis]

Future work could include:
1. Incorporating additional astronomical features to improve prediction accuracy, especially for planets where the model performed less well.
2. Extending the model to predict more detailed characteristics of planetary eclipses, such as duration or magnitude.
3. Investigating the potential for transfer learning between planets - can a model trained on one planet's data improve predictions for another?
4. Developing a unified model that can predict eclipses for all planets simultaneously, potentially uncovering system-wide patterns.
5. Comparing our machine learning approach with traditional astronomical methods for planetary eclipse prediction.

This analysis demonstrates the potential of applying machine learning techniques to complex astronomical phenomena across multiple celestial bodies. It provides a foundation for further research into planetary dynamics and eclipse prediction methodologies.