# Space Weather Monitoring & Solar Storm Risk Prediction System
## Complete Implementation for Google Colab

This notebook implements a comprehensive system for:
- Real-time space weather data collection
- Solar flare prediction
- Geomagnetic storm forecasting
- Satellite and communication system risk assessment
- Interactive visualization dashboard

## 1. Install Required Dependencies

In [None]:
!pip install requests pandas numpy matplotlib seaborn scikit-learn tensorflow plotly
!pip install xgboost lightgbm astropy sunpy beautifulsoup4 lxml
!pip install ipywidgets folium

## 2. Import Libraries

In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import json
import warnings
warnings.filterwarnings('ignore')

# Machine Learning
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score
import xgboost as xgb
import lightgbm as lgb

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Bidirectional, Attention
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print("✓ All libraries imported successfully")

## 3. Data Collection Module

In [None]:
class SpaceWeatherDataCollector:
    """
    Collects real-time and historical space weather data from multiple sources
    """
    
    def __init__(self):
        self.noaa_base_url = "https://services.swpc.noaa.gov/json"
        self.nasa_donki_url = "https://api.nasa.gov/DONKI"
        self.nasa_api_key = "DEMO_KEY"  # Replace with your NASA API key
        
    def get_solar_wind_data(self):
        """Fetch real-time solar wind data"""
        try:
            url = f"{self.noaa_base_url}/plasma-7-day.json"
            response = requests.get(url, timeout=10)
            data = response.json()
            df = pd.DataFrame(data)
            df['time_tag'] = pd.to_datetime(df['time_tag'])
            return df
        except Exception as e:
            print(f"Error fetching solar wind data: {e}")
            return None
    
    def get_geomagnetic_data(self):
        """Fetch geomagnetic K-index data"""
        try:
            url = f"{self.noaa_base_url}/planetary_k_index_1m.json"
            response = requests.get(url, timeout=10)
            data = response.json()
            df = pd.DataFrame(data)
            df['time_tag'] = pd.to_datetime(df['time_tag'])
            return df
        except Exception as e:
            print(f"Error fetching geomagnetic data: {e}")
            return None
    
    def get_solar_flares(self, start_date, end_date):
        """Fetch solar flare events from NASA DONKI"""
        try:
            url = f"{self.nasa_donki_url}/FLR"
            params = {
                'startDate': start_date.strftime('%Y-%m-%d'),
                'endDate': end_date.strftime('%Y-%m-%d'),
                'api_key': self.nasa_api_key
            }
            response = requests.get(url, params=params, timeout=10)
            data = response.json()
            return pd.DataFrame(data) if data else None
        except Exception as e:
            print(f"Error fetching solar flares: {e}")
            return None
    
    def get_cme_data(self, start_date, end_date):
        """Fetch Coronal Mass Ejection data"""
        try:
            url = f"{self.nasa_donki_url}/CME"
            params = {
                'startDate': start_date.strftime('%Y-%m-%d'),
                'endDate': end_date.strftime('%Y-%m-%d'),
                'api_key': self.nasa_api_key
            }
            response = requests.get(url, params=params, timeout=10)
            data = response.json()
            return pd.DataFrame(data) if data else None
        except Exception as e:
            print(f"Error fetching CME data: {e}")
            return None
    
    def get_xray_flux(self):
        """Fetch X-ray flux data (GOES satellite)"""
        try:
            url = f"{self.noaa_base_url}/goes/primary/xrays-7-day.json"
            response = requests.get(url, timeout=10)
            data = response.json()
            df = pd.DataFrame(data)
            df['time_tag'] = pd.to_datetime(df['time_tag'])
            return df
        except Exception as e:
            print(f"Error fetching X-ray flux: {e}")
            return None
    
    def get_proton_flux(self):
        """Fetch proton flux data"""
        try:
            url = f"{self.noaa_base_url}/goes/primary/integral-protons-plot-6-hour.json"
            response = requests.get(url, timeout=10)
            data = response.json()
            df = pd.DataFrame(data)
            df['time_tag'] = pd.to_datetime(df['time_tag'])
            return df
        except Exception as e:
            print(f"Error fetching proton flux: {e}")
            return None

# Initialize collector
collector = SpaceWeatherDataCollector()
print("✓ Data collector initialized")

## 4. Fetch Real-Time Data

In [None]:
print("Fetching real-time space weather data...\n")

# Fetch current data
solar_wind_df = collector.get_solar_wind_data()
geomag_df = collector.get_geomagnetic_data()
xray_df = collector.get_xray_flux()
proton_df = collector.get_proton_flux()

# Fetch historical events (last 30 days)
end_date = datetime.now()
start_date = end_date - timedelta(days=30)
flares_df = collector.get_solar_flares(start_date, end_date)
cme_df = collector.get_cme_data(start_date, end_date)

# Display data summaries
if solar_wind_df is not None:
    print(f"✓ Solar Wind Data: {len(solar_wind_df)} records")
    print(f"  Latest: {solar_wind_df['time_tag'].max()}")
    
if geomag_df is not None:
    print(f"✓ Geomagnetic Data: {len(geomag_df)} records")
    print(f"  Current Kp Index: {geomag_df['kp_index'].iloc[-1] if len(geomag_df) > 0 else 'N/A'}")
    
if xray_df is not None:
    print(f"✓ X-ray Flux Data: {len(xray_df)} records")
    
if flares_df is not None:
    print(f"✓ Solar Flares (30 days): {len(flares_df)} events")
    
if cme_df is not None:
    print(f"✓ CME Events (30 days): {len(cme_df)} events")

print("\n✓ Data collection complete")

## 5. Feature Engineering Module

In [None]:
class FeatureEngineering:
    """
    Creates features for machine learning models
    """
    
    @staticmethod
    def create_time_features(df, time_column='time_tag'):
        """Extract temporal features"""
        df = df.copy()
        df['hour'] = df[time_column].dt.hour
        df['day'] = df[time_column].dt.day
        df['month'] = df[time_column].dt.month
        df['day_of_week'] = df[time_column].dt.dayofweek
        df['day_of_year'] = df[time_column].dt.dayofyear
        return df
    
    @staticmethod
    def create_rolling_features(df, columns, windows=[3, 6, 12, 24]):
        """Create rolling statistics"""
        df = df.copy()
        for col in columns:
            if col in df.columns:
                for window in windows:
                    df[f'{col}_rolling_mean_{window}'] = df[col].rolling(window=window, min_periods=1).mean()
                    df[f'{col}_rolling_std_{window}'] = df[col].rolling(window=window, min_periods=1).std()
                    df[f'{col}_rolling_max_{window}'] = df[col].rolling(window=window, min_periods=1).max()
                    df[f'{col}_rolling_min_{window}'] = df[col].rolling(window=window, min_periods=1).min()
        return df
    
    @staticmethod
    def create_lag_features(df, columns, lags=[1, 3, 6, 12]):
        """Create lagged features"""
        df = df.copy()
        for col in columns:
            if col in df.columns:
                for lag in lags:
                    df[f'{col}_lag_{lag}'] = df[col].shift(lag)
        return df
    
    @staticmethod
    def create_rate_of_change(df, columns):
        """Calculate rate of change"""
        df = df.copy()
        for col in columns:
            if col in df.columns:
                df[f'{col}_roc'] = df[col].pct_change()
                df[f'{col}_diff'] = df[col].diff()
        return df
    
    @staticmethod
    def classify_flare_intensity(flux_value):
        """Classify X-ray flux into flare classes"""
        if flux_value < 1e-8:
            return 'A'
        elif flux_value < 1e-7:
            return 'B'
        elif flux_value < 1e-6:
            return 'C'
        elif flux_value < 1e-5:
            return 'M'
        else:
            return 'X'
    
    @staticmethod
    def classify_geomag_storm(kp_value):
        """Classify geomagnetic storm intensity"""
        if kp_value < 5:
            return 'None'
        elif kp_value < 6:
            return 'G1-Minor'
        elif kp_value < 7:
            return 'G2-Moderate'
        elif kp_value < 8:
            return 'G3-Strong'
        elif kp_value < 9:
            return 'G4-Severe'
        else:
            return 'G5-Extreme'

print("✓ Feature engineering module loaded")

## 6. Generate Synthetic Training Data

In [None]:
def generate_synthetic_training_data(n_samples=10000):
    """
    Generate synthetic space weather data for model training
    Based on realistic parameter ranges
    """
    np.random.seed(42)
    
    # Generate timestamps
    start_date = datetime(2020, 1, 1)
    timestamps = [start_date + timedelta(hours=i) for i in range(n_samples)]
    
    # Solar wind parameters (realistic ranges)
    solar_wind_speed = np.random.normal(450, 100, n_samples)  # km/s
    solar_wind_speed = np.clip(solar_wind_speed, 250, 900)
    
    proton_density = np.random.lognormal(1.5, 0.5, n_samples)  # particles/cm³
    proton_density = np.clip(proton_density, 0.5, 50)
    
    # IMF (Interplanetary Magnetic Field)
    bt = np.random.gamma(2, 2, n_samples)  # nT
    bz = np.random.normal(0, 3, n_samples)  # nT
    
    # Temperature
    temperature = np.random.lognormal(11, 0.5, n_samples)  # Kelvin
    
    # X-ray flux (log scale)
    xray_flux = np.random.lognormal(-17, 2, n_samples)
    xray_flux = np.clip(xray_flux, 1e-9, 1e-3)
    
    # Calculate Kp index (simplified model)
    # Higher solar wind speed + southward Bz = higher Kp
    kp_base = (solar_wind_speed - 300) / 100 + np.abs(np.minimum(bz, 0)) / 2
    kp_noise = np.random.normal(0, 0.5, n_samples)
    kp_index = np.clip(kp_base + kp_noise, 0, 9)
    
    # Flare probability (based on X-ray flux)
    flare_prob = 1 / (1 + np.exp(-10 * (np.log10(xray_flux) + 6)))
    flare_occurred = (np.random.random(n_samples) < flare_prob).astype(int)
    
    # Flare class
    flare_class = [FeatureEngineering.classify_flare_intensity(x) for x in xray_flux]
    
    # Geomagnetic storm classification
    storm_class = [FeatureEngineering.classify_geomag_storm(k) for k in kp_index]
    
    # Create DataFrame
    df = pd.DataFrame({
        'timestamp': timestamps,
        'solar_wind_speed': solar_wind_speed,
        'proton_density': proton_density,
        'bt': bt,
        'bz': bz,
        'temperature': temperature,
        'xray_flux': xray_flux,
        'kp_index': kp_index,
        'flare_occurred': flare_occurred,
        'flare_class': flare_class,
        'storm_class': storm_class
    })
    
    return df

# Generate training data
print("Generating synthetic training data...")
training_data = generate_synthetic_training_data(10000)
print(f"✓ Generated {len(training_data)} training samples")
print(f"\nData shape: {training_data.shape}")
print(f"\nFlare distribution:")
print(training_data['flare_class'].value_counts())
print(f"\nStorm distribution:")
print(training_data['storm_class'].value_counts())

## 7. Prepare Features for ML Models

In [None]:
# Apply feature engineering
fe = FeatureEngineering()

print("Creating features...")
training_data = fe.create_time_features(training_data, 'timestamp')

numeric_cols = ['solar_wind_speed', 'proton_density', 'bt', 'bz', 'temperature', 'xray_flux']
training_data = fe.create_rolling_features(training_data, numeric_cols, windows=[6, 12, 24])
training_data = fe.create_lag_features(training_data, numeric_cols, lags=[1, 3, 6])
training_data = fe.create_rate_of_change(training_data, numeric_cols)

# Fill NaN values
training_data = training_data.fillna(method='bfill').fillna(method='ffill')

print(f"✓ Feature engineering complete")
print(f"Total features: {training_data.shape[1]}")