In [4]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = 'new_dataset.csv'  # Replace with the path to your CSV file
data = pd.read_csv(file_path)

# Preprocessing function
def preprocess_data(data):
    # Identify non-numeric columns
    non_numeric_columns = data.select_dtypes(exclude=[np.number]).columns
    print("Non-numeric columns:", non_numeric_columns.tolist())
    
    # Convert non-numeric columns to numeric (if possible)
    for column in non_numeric_columns:
        data[column] = pd.to_numeric(data[column], errors='coerce')  # Coerce to NaN if conversion fails

    # Handle missing values (fill with mean for numeric columns)
    data.fillna(data.mean(numeric_only=True), inplace=True)  # Use numeric_only to avoid non-numeric errors
    return data

# Function for storm event prediction
def storm_event_prediction(data):
    features = ['u10', 'v10', 'sp', 'tp']  # Wind speed, pressure, precipitation
    X = data[features]
    
    # Simulating storm events (this will need a defined target)
    y = np.random.choice([0, 1], size=len(X), p=[0.9, 0.1])  # Random simulation (adjust probabilities)

    model = LinearRegression()
    model.fit(X, y)
    predictions = model.predict(X)
    
    # Calculate MSE
    mse = mean_squared_error(y, predictions.round())  # Round predictions to 0 or 1 for MSE
    print("Storm Event Prediction MSE:", mse)
    
    return predictions

# Function for heat index prediction
def heat_index_prediction(data):
    features = ['t2m', 'd2m']  # 2m temperature and dewpoint temperature
    X = data[features]
    
    # Simulating heat index
    y = X['t2m'] + (0.33 * X['d2m']) - 42.379  # Example formula, adjust as needed

    model = LinearRegression()
    model.fit(X, y)
    predictions = model.predict(X)

    # Calculate MSE
    mse = mean_squared_error(y, predictions)
    print("Heat Index Prediction MSE:", mse)
    
    return predictions

# Function for climate pattern detection
def climate_pattern_detection(data):
    features = ['z', 'u10', 't2m']  # Geopotential height, wind speed, and temperature

    X = data[features]

    # Simulating climate patterns
    y = np.random.choice(['normal', 'heat_wave', 'cold_front'], size=len(X), p=[0.8, 0.1, 0.1])  # Random simulation

    # Encode the target variable
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Using RandomForestClassifier instead of LinearRegression
    model = RandomForestClassifier()
    model.fit(X, y_encoded)
    predictions = model.predict(X)
    
    # Decode the predictions back to original labels
    predictions = label_encoder.inverse_transform(predictions)
    
    return predictions

# Function for drought index prediction
def drought_index_prediction(data):
    features = ['tp', 'e']  # Precipitation and evaporation
    X = data[features]
    
    # Simulating drought index
    y = np.random.uniform(0, 100, size=len(X))  # Example: random drought index between 0 and 100

    model = LinearRegression()
    model.fit(X, y)
    predictions = model.predict(X)

    # Calculate MSE
    mse = mean_squared_error(y, predictions)
    print("Drought Index Prediction MSE:", mse)
    
    return predictions

# Main Monte Carlo Simulation function
def monte_carlo_simulation(data):
    data = preprocess_data(data)

    storm_predictions = storm_event_prediction(data)
    heat_index_predictions = heat_index_prediction(data)
    climate_pattern_predictions = climate_pattern_detection(data)
    drought_index_predictions = drought_index_prediction(data)

    results = {
        'storm_predictions': storm_predictions,
        'heat_index_predictions': heat_index_predictions,
        'climate_pattern_predictions': climate_pattern_predictions,
        'drought_index_predictions': drought_index_predictions
    }
    
    return results

# Running the simulation
results = monte_carlo_simulation(data)

# Save the model
with open('monte_carlo_model.pkl', 'wb') as file:
    pickle.dump(results, file)

print("Monte Carlo simulation and model saved successfully!")


Non-numeric columns: ['valid_time']
Storm Event Prediction MSE: 0.10051098064796694
Heat Index Prediction MSE: 1.972885171266452e-27
Drought Index Prediction MSE: 830.7433884660745
Monte Carlo simulation and model saved successfully!
