In [1]:
"""
Urban Traffic Optimization System for SDG 11
Machine Learning Model for Sustainable City Transportation
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
import folium
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

class UrbanTrafficOptimizer:
    """
    A machine learning system to optimize urban traffic flow and public transportation
    for sustainable city development (SDG 11)
    """

    def __init__(self):
        self.traffic_data = None
        self.transport_data = None
        self.kmeans_model = None
        self.prediction_model = None
        self.scaler = StandardScaler()

    def generate_synthetic_data(self, n_records=10000):
        """
        Generate synthetic urban traffic and transportation data
        In real scenario, this would come from city sensors and GPS data
        """
        print("Generating synthetic urban traffic data...")

        np.random.seed(42)

        # Generate traffic data
        timestamps = [datetime(2024, 1, 1) + timedelta(minutes=10*i) for i in range(n_records)]

        self.traffic_data = pd.DataFrame({
            'timestamp': timestamps,
            'hour': [t.hour for t in timestamps],
            'day_of_week': [t.weekday() for t in timestamps],
            'location_lat': np.random.uniform(40.70, 40.80, n_records),
            'location_lng': np.random.uniform(-74.02, -73.92, n_records),
            'traffic_volume': np.random.poisson(50, n_records) +
                             np.abs(np.sin([t.hour for t in timestamps])) * 30,
            'average_speed': np.random.uniform(5, 60, n_records),
            'congestion_level': np.random.choice(['Low', 'Medium', 'High'], n_records,
                                               p=[0.3, 0.5, 0.2]),
            'vehicle_type': np.random.choice(['Car', 'Bus', 'Truck', 'Motorcycle'], n_records)
        })

        # Generate public transport data
        bus_routes = ['A1', 'B2', 'C3', 'D4', 'E5']
        self.transport_data = pd.DataFrame({
            'route_id': np.random.choice(bus_routes, n_records),
            'timestamp': timestamps,
            'passenger_count': np.random.poisson(20, n_records),
            'travel_time': np.random.uniform(10, 120, n_records),
            'delay_minutes': np.random.exponential(5, n_records)
        })

        print(f"Generated {len(self.traffic_data)} traffic records")
        print(f"Generated {len(self.transport_data)} transport records")

    def analyze_traffic_patterns(self):
        """Analyze traffic patterns using clustering"""
        print("\n=== Traffic Pattern Analysis ===")

        # Prepare data for clustering
        cluster_features = self.traffic_data[['hour', 'location_lat', 'location_lng', 'traffic_volume']].copy()
        cluster_features_scaled = self.scaler.fit_transform(cluster_features)

        # Find optimal number of clusters using elbow method
        inertias = []
        K = range(1, 8)
        for k in K:
            kmeans = KMeans(n_clusters=k, random_state=42)
            kmeans.fit(cluster_features_scaled)
            inertias.append(kmeans.inertia_)

        # Plot elbow curve
        plt.figure(figsize=(10, 6))
        plt.plot(K, inertias, 'bx-')
        plt.xlabel('Number of clusters')
        plt.ylabel('Inertia')
        plt.title('Elbow Method for Optimal Number of Clusters')
        plt.savefig('elbow_plot.png', dpi=300, bbox_inches='tight')
        plt.close()

        # Apply K-means clustering
        optimal_k = 4  # Based on elbow method analysis
        self.kmeans_model = KMeans(n_clusters=optimal_k, random_state=42)
        self.traffic_data['cluster'] = self.kmeans_model.fit_predict(cluster_features_scaled)

        print(f"Traffic patterns clustered into {optimal_k} groups")

        # Analyze cluster characteristics
        cluster_summary = self.traffic_data.groupby('cluster').agg({
            'traffic_volume': 'mean',
            'average_speed': 'mean',
            'hour': ['min', 'max'],
            'congestion_level': lambda x: x.mode()[0]
        }).round(2)

        print("\nCluster Summary:")
        print(cluster_summary)

        return cluster_summary

    def predict_congestion(self):
        """Build predictive model for traffic congestion"""
        print("\n=== Traffic Congestion Prediction ===")

        # Prepare features for prediction
        features = self.traffic_data[['hour', 'day_of_week', 'location_lat', 'location_lng']].copy()
        features['is_peak_hour'] = ((features['hour'] >= 7) & (features['hour'] <= 9)) | \
                                  ((features['hour'] >= 16) & (features['hour'] <= 19))

        # Convert congestion level to numerical values
        congestion_map = {'Low': 0, 'Medium': 1, 'High': 2}
        target = self.traffic_data['congestion_level'].map(congestion_map)

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            features, target, test_size=0.2, random_state=42
        )

        # Train Random Forest model
        self.prediction_model = RandomForestRegressor(n_estimators=100, random_state=42)
        self.prediction_model.fit(X_train, y_train)

        # Make predictions
        y_pred = self.prediction_model.predict(X_test)

        # Evaluate model
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)

        print(f"Model Performance:")
        print(f"Mean Absolute Error: {mae:.3f}")
        print(f"Mean Squared Error: {mse:.3f}")

        # Feature importance
        feature_importance = pd.DataFrame({
            'feature': features.columns,
            'importance': self.prediction_model.feature_importances_
        }).sort_values('importance', ascending=False)

        print("\nFeature Importance:")
        print(feature_importance)

        return mae, mse, feature_importance

    def optimize_transport_routes(self):
        """Optimize public transport routes based on traffic patterns"""
        print("\n=== Public Transport Route Optimization ===")

        # Merge traffic and transport data
        merged_data = pd.merge(
            self.traffic_data,
            self.transport_data,
            on='timestamp',
            how='inner'
        )

        # Analyze route efficiency
        route_efficiency = merged_data.groupby('route_id').agg({
            'passenger_count': 'mean',
            'travel_time': 'mean',
            'delay_minutes': 'mean',
            'congestion_level': lambda x: (x == 'High').mean()
        }).round(2)

        route_efficiency['efficiency_score'] = (
            route_efficiency['passenger_count'] /
            (route_efficiency['travel_time'] + route_efficiency['delay_minutes'])
        )

        print("Route Efficiency Analysis:")
        print(route_efficiency.sort_values('efficiency_score', ascending=False))

        # Recommend optimizations
        recommendations = []
        for route in route_efficiency.index:
            eff_score = route_efficiency.loc[route, 'efficiency_score']
            congestion_rate = route_efficiency.loc[route, 'congestion_level']

            if eff_score < 0.3:
                rec = f"Route {route}: Consider schedule optimization and dedicated lanes"
            elif congestion_rate > 0.3:
                rec = f"Route {route}: High congestion - suggest alternative routing"
            else:
                rec = f"Route {route}: Performing well - maintain current schedule"

            recommendations.append(rec)

        print("\nOptimization Recommendations:")
        for rec in recommendations:
            print(f"• {rec}")

        return route_efficiency, recommendations

    def visualize_results(self):
        """Create visualizations for the analysis"""
        print("\n=== Generating Visualizations ===")

        # 1. Traffic patterns by hour
        plt.figure(figsize=(12, 8))

        plt.subplot(2, 2, 1)
        hourly_traffic = self.traffic_data.groupby('hour')['traffic_volume'].mean()
        plt.plot(hourly_traffic.index, hourly_traffic.values, marker='o')
        plt.xlabel('Hour of Day')
        plt.ylabel('Average Traffic Volume')
        plt.title('Traffic Patterns Throughout the Day')
        plt.grid(True, alpha=0.3)

        # 2. Cluster visualization
        plt.subplot(2, 2, 2)
        scatter = plt.scatter(self.traffic_data['location_lng'],
                             self.traffic_data['location_lat'],
                             c=self.traffic_data['cluster'],
                             cmap='viridis', alpha=0.6)
        plt.colorbar(scatter, label='Cluster')
        plt.xlabel('Longitude')
        plt.ylabel('Latitude')
        plt.title('Traffic Clusters by Geographic Location')

        # 3. Congestion levels
        plt.subplot(2, 2, 3)
        congestion_counts = self.traffic_data['congestion_level'].value_counts()
        plt.pie(congestion_counts.values, labels=congestion_counts.index, autopct='%1.1f%%')
        plt.title('Distribution of Congestion Levels')

        # 4. Route efficiency
        plt.subplot(2, 2, 4)
        route_efficiency, _ = self.optimize_transport_routes()
        plt.bar(route_efficiency.index, route_efficiency['efficiency_score'])
        plt.xlabel('Route ID')
        plt.ylabel('Efficiency Score')
        plt.title('Public Transport Route Efficiency')
        plt.xticks(rotation=45)

        plt.tight_layout()
        plt.savefig('traffic_analysis_dashboard.png', dpi=300, bbox_inches='tight')
        plt.close()

        print("Visualizations saved as 'traffic_analysis_dashboard.png'")

    def create_interactive_map(self):
        """Create an interactive Folium map showing traffic clusters"""
        print("\nCreating interactive traffic map...")

        # Create base map
        city_center = [self.traffic_data['location_lat'].mean(),
                      self.traffic_data['location_lng'].mean()]

        traffic_map = folium.Map(location=city_center, zoom_start=12)

        # Add cluster markers
        cluster_colors = {0: 'green', 1: 'orange', 2: 'red', 3: 'purple'}

        for idx, row in self.traffic_data.sample(1000).iterrows():  # Sample for performance
            folium.CircleMarker(
                location=[row['location_lat'], row['location_lng']],
                radius=3,
                popup=f"Cluster: {row['cluster']}<br>Volume: {row['traffic_volume']}",
                color=cluster_colors.get(row['cluster'], 'blue'),
                fill=True
            ).add_to(traffic_map)

        # Save map
        traffic_map.save('traffic_clusters_map.html')
        print("Interactive map saved as 'traffic_clusters_map.html'")

    def generate_sustainability_report(self):
        """Generate a sustainability impact report"""
        print("\n=== Sustainability Impact Report ===")

        # Calculate potential benefits
        total_delay = self.transport_data['delay_minutes'].sum()
        avg_speed_improvement = 0.15  # Assumed 15% improvement with optimization
        co2_reduction_per_vehicle = 0.0001  # tons CO2 per vehicle per day

        estimated_benefits = {
            'Reduced Travel Time': f"~{total_delay * 0.2:.0f} minutes daily",
            'Fuel Savings': "15-20% potential reduction",
            'CO2 Emissions Reduction': f"~{len(self.traffic_data) * co2_reduction_per_vehicle:.1f} tons daily",
            'Public Transport Efficiency': "20-30% improvement possible",
            'Traffic Congestion': "10-25% reduction achievable"
        }

        print("Potential Sustainability Benefits:")
        for benefit, value in estimated_benefits.items():
            print(f"• {benefit}: {value}")

        return estimated_benefits

def main():
    """Main execution function"""
    print(" AI for Sustainable Development - SDG 11: Sustainable Cities")
    print("Urban Traffic Optimization System")
    print("=" * 60)

    # Initialize the optimizer
    optimizer = UrbanTrafficOptimizer()

    # Generate and analyze data
    optimizer.generate_synthetic_data(5000)
    optimizer.analyze_traffic_patterns()
    optimizer.predict_congestion()
    optimizer.optimize_transport_routes()

    # Create visualizations
    optimizer.visualize_results()
    optimizer.create_interactive_map()

    # Generate final report
    sustainability_report = optimizer.generate_sustainability_report()

    print("\n" + "=" * 60)
    print(" Project completed successfully!")
    print(" Check generated files:")
    print("   - traffic_analysis_dashboard.png")
    print("   - elbow_plot.png")
    print("   - traffic_clusters_map.html")
    print(" Contributing to SDG 11: Sustainable Cities and Communities")

if __name__ == "__main__":
    main()

 AI for Sustainable Development - SDG 11: Sustainable Cities
Urban Traffic Optimization System
Generating synthetic urban traffic data...
Generated 5000 traffic records
Generated 5000 transport records

=== Traffic Pattern Analysis ===
Traffic patterns clustered into 4 groups

Cluster Summary:
        traffic_volume average_speed hour     congestion_level
                  mean          mean  min max         <lambda>
cluster                                                       
0                65.84         32.85    0  19           Medium
1                68.62         32.85    6  23           Medium
2                69.76         32.36    0  17           Medium
3                70.73         32.67    5  23           Medium

=== Traffic Congestion Prediction ===
Model Performance:
Mean Absolute Error: 0.592
Mean Squared Error: 0.537

Feature Importance:
        feature  importance
3  location_lng    0.360342
2  location_lat    0.351351
0          hour    0.164109
1   day_of_week    0