In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import logging
from typing import Tuple

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')


In [None]:
class TrafficAnalyzer:
    """
    A class to analyze traffic data from rush hour and non-rush hour datasets.
    """

    def __init__(self, rush_hour_url: str, non_rush_hour_url: str):
        self.rush_hour_url = rush_hour_url
        self.non_rush_hour_url = non_rush_hour_url
        self.rush_hour_traffic = pd.DataFrame()
        self.non_rush_hour_traffic = pd.DataFrame()
        self.all_traffic = pd.DataFrame()
        self.hourly_traffic = pd.Series(dtype=float)

    def load_data(self) -> None:
        """Load CSV data from URLs."""
        try:
            self.rush_hour_traffic = pd.read_csv(self.rush_hour_url)
            self.non_rush_hour_traffic = pd.read_csv(self.non_rush_hour_url)
            logging.info("Data loaded successfully.")
        except Exception as e:
            logging.error(f"Error loading data: {e}")
            raise

    def concatenate_data(self) -> None:
        """Combine rush hour and non-rush hour traffic data."""
        self.all_traffic = pd.concat([self.rush_hour_traffic, self.non_rush_hour_traffic], ignore_index=True)

    def calculate_hourly_traffic(self) -> None:
        """Calculate mean traffic volume per hour."""
        if 'time' not in self.all_traffic or 'volume' not in self.all_traffic:
            raise KeyError("Expected 'time' and 'volume' columns in data.")
        self.hourly_traffic = self.all_traffic.groupby('time')['volume'].mean()

    def identify_extreme_hours(self) -> Tuple[str, str]:
        """Return the hours with max and min traffic volume."""
        return self.hourly_traffic.idxmax(), self.hourly_traffic.idxmin()

    def plot_scatter(self, data: pd.DataFrame, title: str) -> None:
        """Plot scatter of traffic volume vs. speed."""
        sns.scatterplot(data=data, x='volume', y='speed')
        plt.title(title)
        plt.xlabel('Volume')
        plt.ylabel('Speed')
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        plt.clf()

    def fit_linear_regression(self, traffic_data: pd.DataFrame) -> LinearRegression:
        """Fit linear regression to traffic data."""
        model = LinearRegression()
        model.fit(traffic_data[['volume']], traffic_data['speed'])
        return model

    def print_model_coefficients(self, model: LinearRegression, model_name: str) -> None:
        """Print model intercept and slope."""
        logging.info(f'{model_name} Model Coefficients: Intercept={model.intercept_:.4f}, Slope={model.coef_[0]:.4f}')

    def plot_regression_line(self, model: LinearRegression, data: pd.DataFrame, title: str) -> None:
        """Plot regression line with scatter."""
        sns.scatterplot(data=data, x='volume', y='speed')
        plt.plot(data['volume'], model.predict(data[['volume']]), color='red', label='Regression Line')
        plt.title(title)
        plt.xlabel('Volume')
        plt.ylabel('Speed')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()
        plt.clf()

    def analyze_traffic(self) -> None:
        """Run the full analysis pipeline."""
        self.load_data()
        self.concatenate_data()
        self.calculate_hourly_traffic()

        max_hour, min_hour = self.identify_extreme_hours()
        logging.info(f'Max traffic hour: {max_hour}')
        logging.info(f'Min traffic hour: {min_hour}')

        self.plot_scatter(self.all_traffic, 'All Traffic: Volume vs. Speed')

        rush_model = self.fit_linear_regression(self.rush_hour_traffic)
        self.print_model_coefficients(rush_model, 'Rush Hour')
        self.plot_regression_line(rush_model, self.rush_hour_traffic, 'Rush Hour: Volume vs. Speed')

        non_rush_model = self.fit_linear_regression(self.non_rush_hour_traffic)
        self.print_model_coefficients(non_rush_model, 'Non-Rush Hour')
        self.plot_regression_line(non_rush_model, self.non_rush_hour_traffic, 'Non-Rush Hour: Volume vs. Speed')


In [None]:
rush_hour_url = 'https://raw.githubusercontent.com/Dwubsey/Traffic-Flow-Patterns/refs/heads/main/rush_hour.csv'
non_rush_hour_url = 'https://raw.githubusercontent.com/Dwubsey/Traffic-Flow-Patterns/refs/heads/main/non_rush_hour.csv'

analyzer = TrafficAnalyzer(rush_hour_url, non_rush_hour_url)
analyzer.analyze_traffic()
