In [1]:
# feature_engineering

In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
# Correct paths

In [4]:


cleaned_path = "C:/Users/user/Desktop/main/AI_Smart_City/cleaned"
feature_path = "C:/Users/user/Desktop/main/AI_Smart_City/data/features"


os.makedirs(feature_path, exist_ok=True)


In [5]:
# TRAFFIC FEATURE ENGINEERING

In [6]:
print(" Loading traffic_cleaned.csv...")
traffic = pd.read_csv(os.path.join(cleaned_path, "traffic_cleaned.csv"))

print(" Shape before features:", traffic.shape)

 Loading traffic_cleaned.csv...
 Shape before features: (44651, 9)


In [7]:
#  Ensure datetime format

In [8]:

date_cols = [col for col in traffic.columns if 'date' in col.lower()]
if date_cols:
    traffic[date_cols[0]] = pd.to_datetime(traffic[date_cols[0]], errors='coerce')


In [9]:
#  Create time-based features 

In [10]:

traffic['hour'] = traffic[date_cols[0]].dt.hour
traffic['day'] = traffic[date_cols[0]].dt.day
traffic['month'] = traffic[date_cols[0]].dt.month
traffic['day_of_week'] = traffic[date_cols[0]].dt.dayofweek
traffic['is_weekend'] = traffic['day_of_week'].isin([5, 6]).astype(int)

In [11]:
# Example of aggregated feature 

In [12]:

if 'traffic_volume' in traffic.columns:
    traffic['traffic_peak'] = np.where(traffic['traffic_volume'] > traffic['traffic_volume'].median(), 1, 0)

In [13]:

traffic.to_csv(os.path.join(feature_path, "traffic_features.csv"), index=False)
print(" Saved  traffic_features.csv")
print(" Shape after features:", traffic.shape, "\n")

 Saved  traffic_features.csv
 Shape after features: (44651, 15) 



In [14]:
#   AIR QUALITY FEATURE ENGINEERING

In [15]:
print(" Loading air_quality_cleaned.csv...")
aqi = pd.read_csv(os.path.join(cleaned_path, "air_quality_cleaned.csv"))

 Loading air_quality_cleaned.csv...


In [16]:
#  Handle date/time if available 

In [17]:

date_cols = [col for col in aqi.columns if 'date' in col.lower()]
if date_cols:
    aqi[date_cols[0]] = pd.to_datetime(aqi[date_cols[0]], errors='coerce')
    aqi['month'] = aqi[date_cols[0]].dt.month
    aqi['day_of_week'] = aqi[date_cols[0]].dt.dayofweek

In [18]:
# Pollution intensity feature 

In [19]:

pollution_cols = [c for c in ['pm2.5', 'pm10', 'no2', 'so2', 'co', 'o3'] if c in aqi.columns]
if pollution_cols:
    aqi['pollution_mean'] = aqi[pollution_cols].mean(axis=1)
    aqi['pollution_std'] = aqi[pollution_cols].std(axis=1)

In [20]:
#  Categorical label based on AQI 

In [21]:

if 'aqi' in aqi.columns:
    aqi['aqi_category'] = pd.cut(
        aqi['aqi'],
        bins=[0, 50, 100, 150, 200, 300, 500],
        labels=['Good', 'Moderate', 'Unhealthy', 'Very Unhealthy', 'Severe', 'Hazardous']
    )


In [22]:
# --- Save engineered file ---
aqi.to_csv(os.path.join(feature_path, "air_quality_features.csv"), index=False)
print(" Saved  air_quality_features.csv")
print(" Shape after features:", aqi.shape, "\n")

 Saved  air_quality_features.csv
 Shape after features: (5463, 19) 



In [23]:
# ENERGY CONSUMPTION FEATURE ENGINEERING

In [25]:
print("Loading energy_cleaned.csv...")
energy = pd.read_csv(os.path.join(cleaned_path, "energy_cleaned.csv"))

Loading energy_cleaned.csv...


  energy = pd.read_csv(os.path.join(cleaned_path, "energy_cleaned.csv"))


In [28]:

print(" Shape before features:", energy.shape)

 Shape before features: (2075259, 9)


In [29]:
# --- Clean '?' and convert numerics ---

In [30]:

energy.replace('?', np.nan, inplace=True)
for col in energy.columns:
    if col.lower() not in ['date', 'time']:
        energy[col] = pd.to_numeric(energy[col], errors='coerce')


In [33]:
#  Ensure datetime format 

In [34]:

date_cols = [col for col in energy.columns if 'date' in col.lower()]
if date_cols:
    energy[date_cols[0]] = pd.to_datetime(energy[date_cols[0]], errors='coerce')
    energy['hour'] = energy[date_cols[0]].dt.hour if 'hour' not in energy.columns else energy['hour']
    energy['day'] = energy[date_cols[0]].dt.day
    energy['month'] = energy[date_cols[0]].dt.month
    energy['day_of_week'] = energy[date_cols[0]].dt.dayofweek
    energy['is_weekend'] = energy['day_of_week'].isin([5, 6]).astype(int)

In [None]:
# --- Lag features (previous readings) ---

In [35]:

numeric_cols = energy.select_dtypes(include='number').columns
for col in numeric_cols:
    energy[f"{col}_prev"] = energy[col].shift(1)


In [36]:
if 'Global_active_power' in energy.columns:
    energy['power_rolling_mean'] = energy['Global_active_power'].rolling(window=3).mean()

In [None]:
#  Drop rows with NaN created by lag/rolling 

In [37]:

energy = energy.dropna()


In [None]:
#  Save engineered file 

In [38]:

energy.to_csv(os.path.join(feature_path, "energy_features.csv"), index=False)
print("Saved  energy_features.csv")
print(" Shape after features:", energy.shape, "\n")

Saved  energy_features.csv
 Shape after features: (2049137, 27) 



In [None]:
#  Summary

In [39]:

print(" Feature Engineering Completed!")
print("Feature files saved in:", feature_path)
print(os.listdir(feature_path))

 Feature Engineering Completed!
Feature files saved in: C:/Users/user/Desktop/main/AI_Smart_City/data/features
['air_quality_features.csv', 'energy_features.csv', 'traffic_features.csv']
