# Earth Eclipse Prediction Analysis

This notebook analyzes the performance of our Earth Eclipse prediction model, visualizes historical and predicted eclipses, and explores patterns in eclipse occurrences.

In [None]:
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.metrics import classification_report, confusion_matrix

from src.data_processing.data_loader import DataLoader
from src.models.eclipse_predictor import EclipsePredictor
from src.evaluation.prediction_evaluator import evaluate_eclipse_predictions

%matplotlib inline
plt.style.use('seaborn')

## 1. Load and Prepare Data

In [None]:
loader = DataLoader()
eclipse_data = loader.load_eclipse_data()

# Split data into training and testing sets
train_data = eclipse_data[eclipse_data['date'] < '2020-01-01']
test_data = eclipse_data[eclipse_data['date'] >= '2020-01-01']

print(f"Training data shape: {train_data.shape}")
print(f"Testing data shape: {test_data.shape}")

# Display sample of the data
print("\nSample of eclipse data:")
print(eclipse_data.head())

## 2. Explore Historical Eclipse Data

In [None]:
def plot_eclipse_distribution(data):
    plt.figure(figsize=(12, 6))
    sns.countplot(x='eclipse_type', data=data)
    plt.title('Distribution of Eclipse Types')
    plt.xlabel('Eclipse Type')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.show()

plot_eclipse_distribution(eclipse_data)

# Calculate and display eclipse frequency
eclipse_frequency = eclipse_data['eclipse_type'].value_counts(normalize=True)
print("\nEclipse frequency:")
print(eclipse_frequency)

## 3. Train Eclipse Prediction Model

In [None]:
eclipse_model = EclipsePredictor()
eclipse_model.train(train_data)

print("Model trained successfully.")

## 4. Evaluate Model Performance

In [None]:
predictions = eclipse_model.predict(test_data['date'])
evaluation_results = evaluate_eclipse_predictions(test_data['eclipse_type'], predictions)

print("Evaluation Results:")
for metric, value in evaluation_results.items():
    print(f"{metric}: {value}")

# Display classification report
print("\nClassification Report:")
print(classification_report(test_data['eclipse_type'], predictions))

# Plot confusion matrix
cm = confusion_matrix(test_data['eclipse_type'], predictions)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix for Eclipse Types')
plt.show()

## 5. Visualize Predictions vs Actual

In [None]:
plt.figure(figsize=(15, 8))
plt.scatter(test_data['date'], test_data['eclipse_type'], alpha=0.5, label='Actual')
plt.scatter(test_data['date'], predictions, alpha=0.5, label='Predicted')
plt.xlabel('Date')
plt.ylabel('Eclipse Type')
plt.title('Actual vs Predicted Eclipse Types')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 6. Analyze Prediction Errors

In [None]:
errors = test_data[test_data['eclipse_type'] != predictions]
print(f"Number of errors: {len(errors)}")
print("\nSample of prediction errors:")
print(errors.head())

# Analyze errors by eclipse type
error_by_type = errors['eclipse_type'].value_counts(normalize=True)
plt.figure(figsize=(10, 6))
error_by_type.plot(kind='bar')
plt.title('Distribution of Errors by Eclipse Type')
plt.xlabel('Eclipse Type')
plt.ylabel('Error Rate')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 7. Predict Future Eclipses

In [None]:
future_dates = pd.date_range(start='2025-01-01', end='2030-12-31', freq='D')
future_predictions = eclipse_model.predict(future_dates)

future_eclipses = pd.DataFrame({'date': future_dates, 'predicted_eclipse': future_predictions})
future_eclipses = future_eclipses[future_eclipses['predicted_eclipse'] != 'No Eclipse']

print("Predicted Eclipses for 2025-2030:")
print(future_eclipses)

# Visualize future eclipse predictions
plt.figure(figsize=(15, 8))
for eclipse_type in future_eclipses['predicted_eclipse'].unique():
    subset = future_eclipses[future_eclipses['predicted_eclipse'] == eclipse_type]
    plt.scatter(subset['date'], [eclipse_type] * len(subset), label=eclipse_type)
plt.xlabel('Date')
plt.ylabel('Eclipse Type')
plt.title('Predicted Eclipses (2025-2030)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 8. Analyze Eclipse Patterns

In [None]:
def analyze_eclipse_intervals(data):
    data = data.sort_values('date')
    intervals = data['date'].diff().dt.days
    
    plt.figure(figsize=(12, 6))
    plt.hist(intervals, bins=30, edgecolor='black')
    plt.title('Distribution of Days Between Eclipses')
    plt.xlabel('Days')
    plt.ylabel('Frequency')
    plt.show()
    
    print(f"Average days between eclipses: {intervals.mean():.2f}")
    print(f"Median days between eclipses: {intervals.median():.2f}")

print("Historical Eclipse Intervals:")
analyze_eclipse_intervals(eclipse_data)

print("\nPredicted Future Eclipse Intervals:")
analyze_eclipse_intervals(future_eclipses)

## 9. Conclusion and Future Work

In this notebook, we've analyzed the performance of our Earth Eclipse prediction model. Key findings include:

1. [Summarize model performance, e.g., accuracy, precision, recall]
2. [Discuss any patterns or interesting observations in historical and predicted eclipses]
3. [Highlight any challenges or areas for improvement]

Future work could include:
1. Incorporating additional features such as lunar node positions to improve prediction accuracy.
2. Extending the model to predict eclipse magnitudes and durations.
3. Developing a user-friendly interface for eclipse predictions and visualizations.
4. Comparing our predictions with those from established astronomical models for validation.

This analysis provides valuable insights into eclipse patterns and demonstrates the potential of machine learning in astronomical predictions.