# Weather Prediction ML Project - Demo

This notebook demonstrates the weather prediction machine learning model.

## Overview
We'll build a model to predict weather conditions (Sunny, Cloudy, Rainy, Stormy) based on meteorological features.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Import our custom modules
from data_preprocessing import generate_sample_data, clean_data, prepare_features, split_data, scale_features
from weather_predictor import WeatherPredictor
import config

# Set visualization style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Generate Sample Data

We'll generate synthetic weather data for demonstration purposes.

In [None]:
# Generate sample data
data = generate_sample_data(n_samples=1000)
print(f"Generated {len(data)} weather samples")
print("\nFirst few rows:")
data.head()

## 2. Data Exploration

In [None]:
# Display basic statistics
print("Data Statistics:")
print(data.describe())

# Display weather condition distribution
print("\nWeather Condition Distribution:")
weather_dist = data['weather_condition'].value_counts()
for condition_id, count in weather_dist.items():
    print(f"{config.WEATHER_CONDITIONS[condition_id]}: {count} ({count/len(data)*100:.1f}%)")

In [None]:
# Visualize weather condition distribution
plt.figure(figsize=(10, 6))
weather_labels = [config.WEATHER_CONDITIONS[i] for i in sorted(data['weather_condition'].unique())]
data['weather_condition'].value_counts().sort_index().plot(kind='bar', color=['#FFD700', '#A9A9A9', '#4682B4', '#8B0000'])
plt.title('Distribution of Weather Conditions', fontsize=16, fontweight='bold')
plt.xlabel('Weather Condition', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.xticks(range(len(weather_labels)), weather_labels, rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Visualize feature distributions
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('Distribution of Weather Features', fontsize=16, fontweight='bold')

for idx, feature in enumerate(config.FEATURE_COLUMNS):
    row = idx // 3
    col = idx % 3
    axes[row, col].hist(data[feature], bins=30, edgecolor='black', alpha=0.7)
    axes[row, col].set_title(feature.replace('_', ' ').title())
    axes[row, col].set_xlabel('Value')
    axes[row, col].set_ylabel('Frequency')

# Remove empty subplot
fig.delaxes(axes[1, 2])
plt.tight_layout()
plt.show()

## 3. Data Preprocessing

In [None]:
# Clean data
cleaned_data = clean_data(data)

# Prepare features
X, y = prepare_features(cleaned_data)

# Split data
X_train, X_test, y_train, y_test = split_data(X, y)

# Scale features
X_train_scaled, X_test_scaled, scaler = scale_features(X_train, X_test)

## 4. Train the Model

In [None]:
# Initialize and train the model
predictor = WeatherPredictor(n_estimators=100)
predictor.train(X_train_scaled, y_train)

## 5. Model Evaluation

In [None]:
# Evaluate the model
metrics = predictor.evaluate(X_test_scaled, y_test)

In [None]:
# Visualize confusion matrix
plt.figure(figsize=(10, 8))
cm = metrics['confusion_matrix']
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=weather_labels, 
            yticklabels=weather_labels)
plt.title('Confusion Matrix - Weather Prediction', fontsize=16, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

## 6. Feature Importance Analysis

In [None]:
# Get and visualize feature importance
feature_importance = predictor.get_feature_importance()

# Plot feature importance
plt.figure(figsize=(10, 6))
features = list(feature_importance.keys())
importance = list(feature_importance.values())
colors = plt.cm.viridis(np.linspace(0, 1, len(features)))

plt.barh(features, importance, color=colors)
plt.xlabel('Importance Score', fontsize=12)
plt.ylabel('Features', fontsize=12)
plt.title('Feature Importance in Weather Prediction', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 7. Making Predictions

In [None]:
# Example prediction for sunny weather
sunny_conditions = np.array([[30, 40, 1015, 10, 20]])  # temp, humidity, pressure, wind_speed, cloud_cover
sunny_scaled = scaler.transform(sunny_conditions)
sunny_result = predictor.predict_weather_description(sunny_scaled)[0]

print("Prediction for Sunny Conditions:")
print(f"Temperature: 30°C, Humidity: 40%, Pressure: 1015 hPa, Wind Speed: 10 km/h, Cloud Cover: 20%")
print(f"\nPredicted Weather: {sunny_result['weather']}")
print(f"Confidence: {sunny_result['confidence']:.2f}%")
print("\nProbabilities:")
for condition, prob in sunny_result['probabilities'].items():
    print(f"  {condition}: {prob:.2f}%")

In [None]:
# Example prediction for rainy weather
rainy_conditions = np.array([[18, 85, 1005, 15, 90]])
rainy_scaled = scaler.transform(rainy_conditions)
rainy_result = predictor.predict_weather_description(rainy_scaled)[0]

print("Prediction for Rainy Conditions:")
print(f"Temperature: 18°C, Humidity: 85%, Pressure: 1005 hPa, Wind Speed: 15 km/h, Cloud Cover: 90%")
print(f"\nPredicted Weather: {rainy_result['weather']}")
print(f"Confidence: {rainy_result['confidence']:.2f}%")
print("\nProbabilities:")
for condition, prob in rainy_result['probabilities'].items():
    print(f"  {condition}: {prob:.2f}%")

In [None]:
# Visualize prediction probabilities
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Sunny prediction
conditions = list(sunny_result['probabilities'].keys())
probs_sunny = list(sunny_result['probabilities'].values())
axes[0].bar(conditions, probs_sunny, color=['#FFD700', '#A9A9A9', '#4682B4', '#8B0000'], alpha=0.7)
axes[0].set_title('Prediction: Sunny Weather', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Probability (%)', fontsize=12)
axes[0].set_ylim(0, 100)
plt.setp(axes[0].xaxis.get_majorticklabels(), rotation=45)

# Rainy prediction
probs_rainy = list(rainy_result['probabilities'].values())
axes[1].bar(conditions, probs_rainy, color=['#FFD700', '#A9A9A9', '#4682B4', '#8B0000'], alpha=0.7)
axes[1].set_title('Prediction: Rainy Weather', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Probability (%)', fontsize=12)
axes[1].set_ylim(0, 100)
plt.setp(axes[1].xaxis.get_majorticklabels(), rotation=45)

plt.tight_layout()
plt.show()

## 8. Summary

In this notebook, we:
1. Generated synthetic weather data
2. Explored and visualized the data
3. Preprocessed the features
4. Trained a Random Forest classifier
5. Evaluated model performance
6. Analyzed feature importance
7. Made predictions on new data

The model successfully predicts weather conditions based on meteorological features with high accuracy!