# 🚀 **Complete Neural Network Project - Master Notebook**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JishnuPG-tech/neural-network-appliance-energy-prediction/blob/main/notebooks/MASTER_COMPLETE_PROJECT.ipynb)

**This notebook runs the complete neural network pipeline from data exploration to model evaluation in one go!**

## 📋 What This Notebook Does:
1. **🔧 Environment Setup** - Clones repo and installs packages
2. **📊 Data Exploration** - Analyzes patterns and relationships
3. **🧹 Data Preprocessing** - Cleans and prepares data
4. **🧠 Neural Network** - Builds and trains TensorFlow model
5. **📊 Model Evaluation** - Evaluates performance with metrics

**⚡ Just run all cells for complete analysis!**

In [None]:
# 🔧 STEP 1: Environment Setup and Repository Clone
import sys
import os
import subprocess

# Check if running in Google Colab
try:
    import google.colab
    IN_COLAB = True
    print("🌟 Running in Google Colab!")
    
    # Clone repository if not exists
    if not os.path.exists('/content/neural-network-appliance-energy-prediction'):
        print("📁 Cloning repository...")
        !git clone https://github.com/JishnuPG-tech/neural-network-appliance-energy-prediction.git
        print("✅ Repository cloned!")
    
    # Change to project directory
    os.chdir('/content/neural-network-appliance-energy-prediction')
    
    # Install required packages
    print("📦 Installing packages...")
    !pip install tensorflow==2.13.0 pandas numpy matplotlib seaborn plotly scikit-learn scipy joblib
    print("✅ All packages installed!")
    
except ImportError:
    IN_COLAB = False
    print("💻 Running in local environment!")
    
print(f"📍 Current directory: {os.getcwd()}")
print("🚀 Environment setup complete!")

In [None]:
# 📚 STEP 2: Import All Required Libraries
import warnings
warnings.filterwarnings('ignore')

# Core Data Science
import pandas as pd
import numpy as np

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, callbacks

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

# Configure plotting
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette("husl")

print("🔥 TensorFlow version:", tf.__version__)
print("✅ All libraries imported successfully!")

## 📊 STEP 3: Data Loading and Exploration

In [None]:
# Load data (create synthetic if not available)
def create_synthetic_data():
    """Create synthetic appliance energy data."""
    np.random.seed(42)
    n_samples = 1000
    
    df = pd.DataFrame({
        'date': pd.date_range('2023-01-01', periods=n_samples, freq='H'),
        'Appliances': np.random.normal(100, 30, n_samples),
        'T1': np.random.normal(20, 5, n_samples),
        'RH_1': np.random.normal(40, 10, n_samples),
        'T2': np.random.normal(22, 4, n_samples),
        'RH_2': np.random.normal(45, 8, n_samples),
        'T_out': np.random.normal(15, 8, n_samples),
        'Press_mm_hg': np.random.normal(760, 20, n_samples),
        'RH_out': np.random.normal(50, 15, n_samples),
        'Windspeed': np.random.normal(5, 2, n_samples),
        'Visibility': np.random.normal(25, 5, n_samples),
        'Tdewpoint': np.random.normal(10, 6, n_samples)
    })
    
    # Ensure positive values for Appliances
    df['Appliances'] = np.abs(df['Appliances'])
    return df

# Load or create data
try:
    df = pd.read_csv('data/appliances_sample_data.csv')
    print("✅ Data loaded from file!")
except FileNotFoundError:
    print("🔧 Creating synthetic dataset...")
    df = create_synthetic_data()

print(f"📊 Dataset shape: {df.shape}")
print(f"🏷️ Columns: {list(df.columns)}")

# Display basic info
print("\n📋 First 5 rows:")
display(df.head())

print("\n📈 Statistical Summary:")
display(df.describe())

In [None]:
# 📊 Data Visualization Dashboard
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Target distribution
axes[0,0].hist(df['Appliances'], bins=50, alpha=0.7, color='skyblue')
axes[0,0].set_title('🎯 Appliances Energy Distribution')
axes[0,0].set_xlabel('Energy (Wh)')
axes[0,0].set_ylabel('Frequency')

# Temperature comparison
axes[0,1].hist(df['T1'], bins=30, alpha=0.7, color='red', label='T1')
axes[0,1].hist(df['T2'], bins=30, alpha=0.7, color='blue', label='T2')
axes[0,1].set_title('🌡️ Temperature Distributions')
axes[0,1].set_xlabel('Temperature (°C)')
axes[0,1].legend()

# Humidity comparison
axes[0,2].hist(df['RH_1'], bins=30, alpha=0.7, color='green', label='RH_1')
axes[0,2].hist(df['RH_2'], bins=30, alpha=0.7, color='orange', label='RH_2')
axes[0,2].set_title('💧 Humidity Distributions')
axes[0,2].set_xlabel('Relative Humidity (%)')
axes[0,2].legend()

# Time series (first 100 points)
if 'date' in df.columns:
    axes[1,0].plot(df['date'][:100], df['Appliances'][:100])
    axes[1,0].set_title('⏰ Energy Over Time (First 100h)')
    axes[1,0].tick_params(axis='x', rotation=45)

# Correlation with target
numeric_cols = df.select_dtypes(include=[np.number]).columns
correlations = df[numeric_cols].corr()['Appliances'].drop('Appliances').sort_values()
axes[1,1].barh(range(len(correlations)), correlations.values)
axes[1,1].set_yticks(range(len(correlations)))
axes[1,1].set_yticklabels(correlations.index)
axes[1,1].set_title('🔗 Feature Correlations with Target')
axes[1,1].set_xlabel('Correlation Coefficient')

# Box plot for outliers
axes[1,2].boxplot(df['Appliances'])
axes[1,2].set_title('📦 Appliances Energy Box Plot')
axes[1,2].set_ylabel('Energy (Wh)')

plt.tight_layout()
plt.show()

print("✅ Data exploration visualizations complete!")

## 🧹 STEP 4: Data Preprocessing and Feature Engineering

In [None]:
# Data cleaning and preprocessing
print("🧹 Starting data preprocessing...")

df_clean = df.copy()

# Remove missing values
initial_shape = df_clean.shape[0]
df_clean = df_clean.dropna()
print(f"📊 Removed {initial_shape - df_clean.shape[0]} rows with missing values")

# Feature engineering
if 'date' in df_clean.columns:
    df_clean['date'] = pd.to_datetime(df_clean['date'])
    df_clean['hour'] = df_clean['date'].dt.hour
    df_clean['day_of_week'] = df_clean['date'].dt.dayofweek
    df_clean['month'] = df_clean['date'].dt.month
    df_clean['is_weekend'] = (df_clean['day_of_week'] >= 5).astype(int)
    
    # Cyclical encoding
    df_clean['hour_sin'] = np.sin(2 * np.pi * df_clean['hour'] / 24)
    df_clean['hour_cos'] = np.cos(2 * np.pi * df_clean['hour'] / 24)
    
    # Drop original date
    df_clean = df_clean.drop(['date'], axis=1)
    print("🕐 Added time-based features")

# Temperature and humidity interactions
if all(col in df_clean.columns for col in ['T1', 'T2', 'RH_1', 'RH_2']):
    df_clean['temp_avg'] = (df_clean['T1'] + df_clean['T2']) / 2
    df_clean['humidity_avg'] = (df_clean['RH_1'] + df_clean['RH_2']) / 2
    df_clean['temp_diff'] = df_clean['T1'] - df_clean['T2']
    df_clean['humidity_diff'] = df_clean['RH_1'] - df_clean['RH_2']
    print("🌡️ Added temperature and humidity features")

# Prepare features and target
target_col = 'Appliances'
feature_cols = [col for col in df_clean.columns if col != target_col]

X = df_clean[feature_cols]
y = df_clean[target_col]

print(f"🎯 Features: {len(feature_cols)}")
print(f"📊 Samples: {len(X)}")
print(f"🏷️ Feature names: {feature_cols}")

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"🚂 Training set: {X_train_scaled.shape}")
print(f"🧪 Test set: {X_test_scaled.shape}")
print("✅ Data preprocessing complete!")

## 🧠 STEP 5: Neural Network Model Building and Training

In [None]:
# Build neural network architecture
print("🏗️ Building neural network architecture...")

model = tf.keras.Sequential([
    # Input layer
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    # Hidden layers
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    
    # Output layer
    tf.keras.layers.Dense(1, activation='linear')
])

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae', 'mse']
)

# Model summary
print("\n📋 Neural Network Architecture:")
model.summary()

# Callbacks for training
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=20, restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7
)

print("✅ Neural network architecture ready!")

In [None]:
# Train the neural network
print("🚂 Training neural network...")

history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

print("\n🎉 Neural network training complete!")

# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
ax1.plot(history.history['loss'], label='Training Loss', color='blue')
ax1.plot(history.history['val_loss'], label='Validation Loss', color='red')
ax1.set_title('📉 Model Loss During Training')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss (MSE)')
ax1.legend()
ax1.grid(True)

# MAE plot
ax2.plot(history.history['mae'], label='Training MAE', color='green')
ax2.plot(history.history['val_mae'], label='Validation MAE', color='orange')
ax2.set_title('📊 Mean Absolute Error During Training')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('MAE')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

## 📊 STEP 6: Model Evaluation and Performance Analysis

In [None]:
# Make predictions
print("🔮 Making predictions...")

y_pred_train = model.predict(X_train_scaled).flatten()
y_pred_test = model.predict(X_test_scaled).flatten()

# Calculate metrics
train_mse = mean_squared_error(y_train, y_pred_train)
train_mae = mean_absolute_error(y_train, y_pred_train)
train_r2 = r2_score(y_train, y_pred_train)

test_mse = mean_squared_error(y_test, y_pred_test)
test_mae = mean_absolute_error(y_test, y_pred_test)
test_r2 = r2_score(y_test, y_pred_test)

# Performance report
print("\n📊 NEURAL NETWORK PERFORMANCE REPORT")
print("=" * 50)

print("\n🚂 TRAINING SET PERFORMANCE:")
print(f"   Mean Squared Error (MSE):     {train_mse:.2f}")
print(f"   Mean Absolute Error (MAE):    {train_mae:.2f}")
print(f"   R-squared (R²):               {train_r2:.4f}")
print(f"   Root Mean Squared Error:      {np.sqrt(train_mse):.2f}")

print("\n🧪 TEST SET PERFORMANCE:")
print(f"   Mean Squared Error (MSE):     {test_mse:.2f}")
print(f"   Mean Absolute Error (MAE):    {test_mae:.2f}")
print(f"   R-squared (R²):               {test_r2:.4f}")
print(f"   Root Mean Squared Error:      {np.sqrt(test_mse):.2f}")

# Model interpretation
print("\n🧠 MODEL INTERPRETATION:")
if test_r2 > 0.8:
    print("   ✅ EXCELLENT: Model explains >80% of variance")
elif test_r2 > 0.6:
    print("   ✅ GOOD: Model explains >60% of variance")
elif test_r2 > 0.4:
    print("   ⚠️ MODERATE: Model explains >40% of variance")
else:
    print("   ❌ POOR: Model explains <40% of variance")

print(f"   📈 Average prediction error: ±{test_mae:.2f} energy units")
print(f"   🎯 Model captures {test_r2*100:.1f}% of energy patterns")

In [None]:
# 📊 Comprehensive Evaluation Visualizations
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Predictions vs Actual (Test Set)
axes[0,0].scatter(y_test, y_pred_test, alpha=0.6, color='blue')
axes[0,0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[0,0].set_xlabel('Actual Energy')
axes[0,0].set_ylabel('Predicted Energy')
axes[0,0].set_title(f'🎯 Predictions vs Actual (Test Set)\nR² = {test_r2:.3f}')
axes[0,0].grid(True)

# 2. Residuals Plot
residuals = y_test - y_pred_test
axes[0,1].scatter(y_pred_test, residuals, alpha=0.6, color='green')
axes[0,1].axhline(y=0, color='red', linestyle='--')
axes[0,1].set_xlabel('Predicted Energy')
axes[0,1].set_ylabel('Residuals (Actual - Predicted)')
axes[0,1].set_title('📊 Residuals Plot')
axes[0,1].grid(True)

# 3. Residuals Distribution
axes[1,0].hist(residuals, bins=30, alpha=0.7, color='purple')
axes[1,0].axvline(residuals.mean(), color='red', linestyle='--', label=f'Mean: {residuals.mean():.2f}')
axes[1,0].set_xlabel('Residuals')
axes[1,0].set_ylabel('Frequency')
axes[1,0].set_title('📈 Residuals Distribution')
axes[1,0].legend()
axes[1,0].grid(True)

# 4. Training vs Validation Loss
axes[1,1].plot(history.history['loss'], label='Training Loss', color='blue')
axes[1,1].plot(history.history['val_loss'], label='Validation Loss', color='red')
axes[1,1].set_xlabel('Epoch')
axes[1,1].set_ylabel('Loss (MSE)')
axes[1,1].set_title('📉 Training Progress')
axes[1,1].legend()
axes[1,1].grid(True)

plt.tight_layout()
plt.show()

print("✅ Complete evaluation visualizations generated!")

## 🎉 Project Summary and Next Steps

In [None]:
# Final project summary
print("\n" + "="*60)
print("🎉 NEURAL NETWORK PROJECT COMPLETED SUCCESSFULLY!")
print("="*60)

print("\n📊 PROJECT STATISTICS:")
print(f"   📁 Dataset size: {len(df)} samples")
print(f"   🎯 Features used: {len(feature_cols)}")
print(f"   🚂 Training samples: {len(X_train)}")
print(f"   🧪 Test samples: {len(X_test)}")
print(f"   🧠 Model parameters: {model.count_params():,}")

print("\n🏆 FINAL PERFORMANCE:")
print(f"   📈 Test R²: {test_r2:.3f}")
print(f"   📊 Test MAE: {test_mae:.2f}")
print(f"   🎯 Test RMSE: {np.sqrt(test_mse):.2f}")

print("\n🚀 WHAT WAS ACCOMPLISHED:")
print("   ✅ Data exploration and visualization")
print("   ✅ Feature engineering and preprocessing")
print("   ✅ Neural network architecture design")
print("   ✅ Model training with regularization")
print("   ✅ Comprehensive performance evaluation")
print("   ✅ Advanced visualizations and insights")

print("\n🔮 NEXT STEPS:")
print("   🎯 Fine-tune hyperparameters for better performance")
print("   📊 Try different neural network architectures")
print("   🔍 Analyze feature importance in more detail")
print("   💾 Save and deploy the model for production use")
print("   📈 Collect more data for improved accuracy")

print("\n💡 KEY INSIGHTS:")
if test_r2 > 0.7:
    print("   🌟 Your neural network shows excellent predictive performance!")
    print("   🎯 The model successfully captures appliance energy patterns.")
else:
    print("   📊 Your neural network shows good learning capability.")
    print("   🔧 Consider more data or feature engineering for improvement.")

print(f"\n📱 Model ready for appliance energy prediction!")
print(f"🎯 Input {len(feature_cols)} features → Get energy consumption prediction")

print("\n" + "="*60)
print("🚀 SUCCESS! Your complete neural network pipeline is ready!")
print("="*60)