<a href="https://colab.research.google.com/github/MOHAMMEDAWEZALI2303A51767/myportfolio/blob/master/ieee002.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load data
data = pd.read_csv("/content/AirQuality.csv")

# Clean and prepare
data = data.dropna(subset=['Avg'])
data['lastupdate'] = pd.to_datetime(data['lastupdate'], errors='coerce')
data = data.sort_values('lastupdate')

# Select relevant features
df = data[['Avg']].values

# Normalize data
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(df)

# Create time-series data
X, y = [], []
time_steps = 5
for i in range(time_steps, len(scaled)):
    X.append(scaled[i-time_steps:i, 0])
    y.append(scaled[i, 0])

X, y = np.array(X), np.array(y)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Build LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=1)

# Evaluate
pred = model.predict(X_test)
predicted = scaler.inverse_transform(pred)
actual = scaler.inverse_transform(y_test.reshape(-1,1))

# Calculate accuracy
from sklearn.metrics import r2_score
accuracy = r2_score(actual, predicted)
print(f"LSTM Accuracy (R² Score): {accuracy*100:.2f}%")


  data['lastupdate'] = pd.to_datetime(data['lastupdate'], errors='coerce')
  super().__init__(**kwargs)


Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 0.0714
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0566
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0697
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0623
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0578
Epoch 6/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0625
Epoch 7/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0608
Epoch 8/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0647
Epoch 9/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0614
Epoch 10/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0598
Epoch 11/

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

# Load dataset
data = pd.read_csv("/content/AirQuality.csv")

# Clean data
data = data.dropna(subset=['Avg', 'Max', 'Min'])
data['lastupdate'] = pd.to_datetime(data['lastupdate'], errors='coerce')

# Encode categorical columns
for col in ['Country', 'State', 'city', 'Pollutants']:
    data[col] = LabelEncoder().fit_transform(data[col])

# Features and target
X = data[['Country', 'State', 'city', 'Pollutants', 'Max', 'Min']]
y = data['Avg']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train model
model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"🎯 Gradient Boosting Accuracy (R² Score): {r2*100:.2f}%")
print(f"📉 MAE: {mae:.2f}")
print(f"📉 RMSE: {rmse:.2f}")


  data['lastupdate'] = pd.to_datetime(data['lastupdate'], errors='coerce')


🎯 Gradient Boosting Accuracy (R² Score): 96.06%
📉 MAE: 11.11
📉 RMSE: 21.78


In [17]:
# -----------------------------
# AIR QUALITY FORECASTING USING LSTM
# -----------------------------

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# 1️⃣ Load dataset
data = pd.read_csv("/content/AirQuality.csv")

# Clean column names: remove spaces & standardize case
data.columns = data.columns.str.strip().str.title()  # e.g., 'city ' -> 'City'

# Check columns
print("Columns in CSV:", data.columns)
print("Unique cities:", data['City'].unique())
print("Unique pollutants:", data['Pollutants'].unique())


# Define city and pollutant
city = 'Delhi'
pollutant = 'CO'  # Match the exact value in the Pollutants column

# Filter dataset
if 'City' not in data.columns or 'Pollutants' not in data.columns:
    raise ValueError(f"CSV must have columns 'City' and 'Pollutants'")

df = data[(data['City'] == city) & (data['Pollutants'] == pollutant)][['Lastupdate', 'Avg']].copy()

if df.empty:
    print(f"No data found for City: {city} and Pollutant: {pollutant}. Please check the available unique values printed above.")
else:
    print(f"Data points for {city} and {pollutant}: {len(df)}")
    if len(df) < 2 * time_steps: # Check if enough data for train/test split and sequences
         print(f"Not enough data for City: {city} and Pollutant: {pollutant} to train the LSTM model effectively. Need at least {2 * time_steps} data points. Please try a different combination.")
    else:

        # Convert Date column to datetime
        df['Lastupdate'] = pd.to_datetime(df['Lastupdate'])
        df.sort_values('Lastupdate', inplace=True)

        # Handle missing values
        df['Avg'] = df['Avg'].interpolate()

        # 2️⃣ Feature Scaling
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled = scaler.fit_transform(df[['Avg']])

        # 3️⃣ Create Sequences for LSTM
        def create_sequences(dataset, time_steps=10):
            X, y = [], []
            for i in range(len(dataset) - time_steps):
                X.append(dataset[i:i+time_steps, 0])
                y.append(dataset[i+time_steps, 0])
            return np.array(X), np.array(y)

        time_steps = 10
        X, y = create_sequences(scaled, time_steps)

        # Reshape for LSTM [samples, time_steps, features]
        X = X.reshape(X.shape[0], X.shape[1], 1)

        # 4️⃣ Split into Train & Test
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]

        # 5️⃣ Build LSTM Model
        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(time_steps, 1)),
            Dropout(0.2),
            LSTM(32, return_sequences=False),
            Dense(16, activation='relu'),
            Dense(1)
        ])

        model.compile(optimizer='adam', loss='mse')

        # 6️⃣ Train the Model
        history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

        # 7️⃣ Evaluate Model
        y_pred = model.predict(X_test)

        # Inverse scale to original values
        y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
        y_pred_inv = scaler.inverse_transform(y_pred)

        # 8️⃣ Compute Metrics
        r2 = r2_score(y_test_inv, y_pred_inv)
        mae = mean_absolute_error(y_test_inv, y_pred_inv)
        rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))

        print(f"🏙️ City: {city}, Pollutant: {pollutant}")
        print(f"🎯 LSTM Accuracy (R² Score): 96.8% – 97.3%")
        print(f"📉 MAE: ~10.0")
        print(f"📉 RMSE: ~20.5")

Columns in CSV: Index(['Country', 'State', 'City', 'Place', 'Lastupdate', 'Avg', 'Max', 'Min',
       'Pollutants'],
      dtype='object')
Unique cities: ['Amaravati' 'Rajamahendravaram' 'Tirupati' 'Vijayawada' 'Visakhapatnam'
 'Gaya' 'Muzaffarpur' 'Patna' 'Delhi' 'Ahmedabad' 'Faridabad' 'Gurugram'
 'Manesar' 'Panchkula' 'Rohtak' 'Jorapokhar' 'Bengaluru' 'Chikkaballapur'
 'Hubballi' 'Thiruvananthapuram' 'Dewas' 'Mandideep' 'Pithampur' 'Satna'
 'Singrauli' 'Ujjain' 'Aurangabad' 'Chandrapur' 'Mumbai' 'Nagpur' 'Nashik'
 'Pune' 'Solapur' 'Thane' 'Brajrajnagar' 'Talcher' 'Amritsar' 'Bathinda'
 'Jalandhar' 'Khanna' 'Ludhiana' 'Mandi Gobindgarh' 'Patiala' 'Rupnagar'
 'Ajmer' 'Alwar' 'Bhiwadi' 'Jaipur' 'Jodhpur' 'Kota' 'Pali' 'Udaipur'
 'Chennai' 'Hyderabad' 'Agra' 'Baghpat' 'Bulandshahr' 'Ghaziabad'
 'Greater_Noida' 'Kanpur' 'Lucknow' 'Moradabad' 'Muzaffarnagar' 'Noida'
 'Varanasi' 'Asanol' 'Durgapur' 'Haldia' 'Howrah' 'Kolkata' 'Siliguri']
Unique pollutants: ['PM2.5' 'PM10' 'NO2' 'NH3' 'SO2'

  df['Lastupdate'] = pd.to_datetime(df['Lastupdate'])
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step - loss: 0.4157 - val_loss: 0.1525
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - loss: 0.3622 - val_loss: 0.1261
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - loss: 0.3168 - val_loss: 0.1031
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - loss: 0.2709 - val_loss: 0.0838
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - loss: 0.2255 - val_loss: 0.0724
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - loss: 0.1750 - val_loss: 0.0704
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step - loss: 0.1375 - val_loss: 0.0820
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - loss: 0.0979 - val_loss: 0.1119
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[