# Smart City System Analysis

This notebook implements a comprehensive Smart City system focusing on sustainability analysis and optimization. The system collects, processes, and analyzes urban data to optimize resource usage and enhance sustainability.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Any, Union, Optional, Tuple
import logging
import requests
import os
import datetime
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

## Data Collection Layer

The data collection layer handles gathering data from various sources including OpenStreetMap, real estate datasets, and IoT sensors.

In [None]:
class OpenStreetMapSource:
    """Data source for OpenStreetMap API."""
    
    def __init__(self, api_endpoint="https://nominatim.openstreetmap.org/search"):
        self.api_endpoint = api_endpoint
    
    def fetch_data(self, query, limit=10):
        """Fetch geographical data from OpenStreetMap."""
        try:
            params = {
                'q': query,
                'format': 'json',
                'limit': limit
            }
            
            response = requests.get(self.api_endpoint, params=params)
            response.raise_for_status()
            
            data = response.json()
            return pd.DataFrame(data)
            
        except Exception as e:
            print(f"Error fetching data: {e}")
            return pd.DataFrame()

class RealEstateDataSource:
    """Data source for real estate datasets."""
    
    def fetch_data(self):
        """Generate sample real estate data."""
        return pd.DataFrame({
            'property_id': range(1, 11),
            'price': [250000, 300000, 350000, 400000, 450000, 500000, 550000, 600000, 650000, 700000],
            'size_sqft': [1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800],
            'bedrooms': [2, 2, 3, 3, 3, 4, 4, 4, 5, 5],
            'location': ['Downtown', 'Suburb', 'Downtown', 'Suburb', 'Downtown', 'Suburb', 'Downtown', 'Suburb', 'Downtown', 'Suburb']
        })

class IoTEnvironmentalSource:
    """Data source for IoT environmental sensors."""
    
    def fetch_data(self):
        """Generate sample IoT sensor data."""
        return pd.DataFrame({
            'sensor_id': ['S001', 'S002', 'S003', 'S004', 'S005'],
            'temperature': [22.5, 23.1, 21.8, 24.2, 22.9],
            'humidity': [45, 48, 52, 40, 47],
            'air_quality_index': [65, 70, 85, 60, 75],
            'water_usage': [100, 120, 90, 110, 105],
            'timestamp': pd.date_range(start='2023-01-01', periods=5, freq='H')
        })

## Data Processing Layer

The data processing layer handles data cleaning, feature engineering, and normalization.

In [None]:
class DataPreprocessor:
    """Cleans and prepares raw data for analysis."""
    
    def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
        """Apply preprocessing steps to data."""
        if df.empty:
            return df
            
        # Remove duplicates
        df = df.drop_duplicates()
        
        # Handle missing values
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        categorical_cols = df.select_dtypes(exclude=[np.number]).columns
        
        for col in numeric_cols:
            df[col] = df[col].fillna(df[col].mean())
            
        for col in categorical_cols:
            df[col] = df[col].fillna(df[col].mode()[0])
        
        return df

class FeatureEngineer:
    """Creates meaningful features from raw data."""
    
    def create_features(self, df: pd.DataFrame) -> pd.DataFrame:
        """Generate new features."""
        if df.empty:
            return df
            
        result_df = df.copy()
        
        # Create time-based features if timestamp exists
        if 'timestamp' in df.columns:
            result_df['hour'] = df['timestamp'].dt.hour
            result_df['day_of_week'] = df['timestamp'].dt.dayofweek
            result_df['month'] = df['timestamp'].dt.month
            
        # Create interaction features for numeric columns
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        for i, col1 in enumerate(numeric_cols):
            for col2 in numeric_cols[i+1:]:
                result_df[f"{col1}_times_{col2}"] = df[col1] * df[col2]
        
        return result_df

## AI Model Layer

The AI model layer handles training and evaluation of machine learning models.

In [None]:
class ModelTrainer:
    """Trains machine learning models on processed data."""
    
    def train(self, X: pd.DataFrame, y: pd.Series, model_type: str = "random_forest") -> Dict[str, Any]:
        """Train a machine learning model."""
        try:
            # Split data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train model
            model = RandomForestRegressor(n_estimators=100, random_state=42)
            model.fit(X_train_scaled, y_train)
            
            # Evaluate
            train_score = model.score(X_train_scaled, y_train)
            test_score = model.score(X_test_scaled, y_test)
            
            return {
                "status": "success",
                "model": model,
                "scaler": scaler,
                "train_score": train_score,
                "test_score": test_score
            }
            
        except Exception as e:
            print(f"Error training model: {e}")
            return {"status": "error", "message": str(e)}

## Sustainability Metrics Layer

The sustainability metrics layer analyzes various environmental factors.

In [None]:
class WaterMetricsAnalyzer:
    """Analyzes water usage patterns."""
    
    def analyze_consumption(self, data: pd.DataFrame, consumption_column: str) -> Dict[str, Any]:
        """Analyze water consumption patterns."""
        if data.empty:
            return {"status": "error", "message": "Empty dataset"}
            
        total_consumption = data[consumption_column].sum()
        avg_consumption = data[consumption_column].mean()
        
        return {
            "status": "success",
            "total_consumption": float(total_consumption),
            "average_consumption": float(avg_consumption),
            "stats": {
                "min": float(data[consumption_column].min()),
                "max": float(data[consumption_column].max()),
                "std": float(data[consumption_column].std())
            }
        }

class EnvironmentalFactorsAnalyzer:
    """Analyzes environmental parameters."""
    
    def analyze_air_quality(self, data: pd.DataFrame, aqi_column: str) -> Dict[str, Any]:
        """Analyze air quality data."""
        if data.empty:
            return {"status": "error", "message": "Empty dataset"}
            
        avg_aqi = data[aqi_column].mean()
        
        # Determine air quality category
        if avg_aqi <= 50:
            category = "Good"
        elif avg_aqi <= 100:
            category = "Moderate"
        elif avg_aqi <= 150:
            category = "Unhealthy for Sensitive Groups"
        else:
            category = "Unhealthy"
            
        return {
            "status": "success",
            "average_aqi": float(avg_aqi),
            "category": category,
            "stats": {
                "min": float(data[aqi_column].min()),
                "max": float(data[aqi_column].max()),
                "std": float(data[aqi_column].std())
            }
        }

## Optimization Layer

The optimization layer applies machine learning to optimize sustainability metrics.

In [None]:
class AIOptimizer:
    """Applies machine learning for optimization."""
    
    def optimize(self, data: pd.DataFrame, target_column: str, 
                feature_columns: List[str], constraints: Dict[str, Any]) -> Dict[str, Any]:
        """Optimize target variable based on constraints."""
        try:
            # Train model
            X = data[feature_columns]
            y = data[target_column]
            
            trainer = ModelTrainer()
            model_result = trainer.train(X, y)
            
            if model_result["status"] != "success":
                return model_result
            
            model = model_result["model"]
            scaler = model_result["scaler"]
            
            # Generate random samples within constraints
            num_samples = 1000
            samples = {}
            
            for feature in feature_columns:
                if feature in constraints:
                    min_val = constraints[feature].get("min", data[feature].min())
                    max_val = constraints[feature].get("max", data[feature].max())
                    samples[feature] = np.random.uniform(min_val, max_val, num_samples)
                else:
                    samples[feature] = np.random.uniform(data[feature].min(), data[feature].max(), num_samples)
            
            samples_df = pd.DataFrame(samples)
            samples_scaled = scaler.transform(samples_df)
            
            # Make predictions
            predictions = model.predict(samples_scaled)
            
            # Find optimal solution
            best_idx = np.argmin(predictions)  # Assuming we want to minimize the target
            
            return {
                "status": "success",
                "optimal_values": dict(samples_df.iloc[best_idx]),
                "predicted_target": float(predictions[best_idx])
            }
            
        except Exception as e:
            print(f"Error during optimization: {e}")
            return {"status": "error", "message": str(e)}

## Example Usage

Let's demonstrate the system with some example data.

In [None]:
# Initialize components
iot_source = IoTEnvironmentalSource()
preprocessor = DataPreprocessor()
feature_engineer = FeatureEngineer()
water_analyzer = WaterMetricsAnalyzer()
env_analyzer = EnvironmentalFactorsAnalyzer()
optimizer = AIOptimizer()

# Collect data
iot_data = iot_source.fetch_data()
print("\nCollected IoT data:")
display(iot_data.head())

# Preprocess data
processed_data = preprocessor.preprocess(iot_data)
featured_data = feature_engineer.create_features(processed_data)
print("\nProcessed data with engineered features:")
display(featured_data.head())

# Analyze water consumption
water_analysis = water_analyzer.analyze_consumption(featured_data, 'water_usage')
print("\nWater consumption analysis:")
print(json.dumps(water_analysis, indent=2))

# Analyze air quality
air_analysis = env_analyzer.analyze_air_quality(featured_data, 'air_quality_index')
print("\nAir quality analysis:")
print(json.dumps(air_analysis, indent=2))

# Optimize water usage
optimization_result = optimizer.optimize(
    featured_data,
    target_column='water_usage',
    feature_columns=['temperature', 'humidity', 'hour'],
    constraints={
        'temperature': {'min': 20, 'max': 25},
        'humidity': {'min': 40, 'max': 60}
    }
)
print("\nOptimization results:")
print(json.dumps(optimization_result, indent=2))

## Visualization

Let's create some visualizations of our analysis.

In [None]:
# Set up the plotting style
plt.style.use('seaborn')

# Create a figure with multiple subplots
fig = plt.figure(figsize=(15, 10))

# 1. Water usage over time
plt.subplot(2, 2, 1)
plt.plot(featured_data['timestamp'], featured_data['water_usage'], marker='o')
plt.title('Water Usage Over Time')
plt.xticks(rotation=45)
plt.ylabel('Water Usage')

# 2. Air Quality Index distribution
plt.subplot(2, 2, 2)
sns.histplot(featured_data['air_quality_index'], bins=10)
plt.title('Air Quality Index Distribution')
plt.xlabel('AQI')

# 3. Temperature vs Water Usage
plt.subplot(2, 2, 3)
plt.scatter(featured_data['temperature'], featured_data['water_usage'])
plt.title('Temperature vs Water Usage')
plt.xlabel('Temperature')
plt.ylabel('Water Usage')

# 4. Humidity vs Air Quality
plt.subplot(2, 2, 4)
plt.scatter(featured_data['humidity'], featured_data['air_quality_index'])
plt.title('Humidity vs Air Quality')
plt.xlabel('Humidity')
plt.ylabel('Air Quality Index')

plt.tight_layout()
plt.show()