<a href="https://colab.research.google.com/github/Sameera326/GenerativeAIB40/blob/main/GenAI_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf

# Step 1: Load the Dataset
df = pd.read_excel("Data_Air Pollutant.xlsx")  # Adjust path if needed

# Step 2: Drop missing values
df = df.dropna()

# Step 3: Keep numeric columns only
numeric_df = df.select_dtypes(include=[np.number])

# Step 4: Check for target column
if 'PM2.5' not in numeric_df.columns:
    raise ValueError("Target column 'PM2.5' not found in dataset.")

# Step 5: Split features and target
X = numeric_df.drop(columns=['PM2.5'])
y = numeric_df['PM2.5']

# Step 6: Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Step 7: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42)

# Step 8: Build ANN model
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(8, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Step 9: Train the model
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1)

# Step 10: Evaluate model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
y_pred = model.predict(X_test).flatten()

# Step 11: Calculate additional metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

# Step 12: Custom accuracy (within ±10 units)
tolerance = 10
custom_accuracy = np.mean(np.abs(y_test - y_pred) <= tolerance) * 100

# Print results
print(f"\n📊 Model Evaluation:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score (accuracy-like): {r2:.4f}")
print(f"Custom Accuracy (±{tolerance}): {custom_accuracy:.2f}%")

# Step 13: Convert to TFLite for deployment
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model
with open("air_quality_model.tflite", "wb") as f:
    f.write(tflite_model)

print("\n✅ TFLite model saved as 'air_quality_model.tflite'")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 1299.2836 - mae: 32.2240 - val_loss: 1377.6285 - val_mae: 34.5473
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1374.6130 - mae: 33.1281 - val_loss: 1360.8951 - val_mae: 34.3100
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1424.9351 - mae: 33.7062 - val_loss: 1329.0078 - val_mae: 33.8536
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1408.5243 - mae: 33.5291 - val_loss: 1285.0693 - val_mae: 33.2189
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1342.0667 - mae: 33.0345 - val_loss: 1229.5132 - val_mae: 32.3985
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1253.5635 - mae: 31.0115 - val_loss: 1156.6190 - val_mae: 31.2961
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━



[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 56ms/step



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step

📊 Model Evaluation:
MAE: 10.51
RMSE: 13.67
R² Score (accuracy-like): 0.2519
Custom Accuracy (±10): 57.95%
Saved artifact at '/tmp/tmpka5ioveu'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 18), dtype=tf.float32, name='keras_tensor_22')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  137740067700560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137740067700368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137740064441168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137740064442512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137740064440400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137740064443280: TensorSpec(shape=(), dtype=tf.resource, name=None)

✅ TFLite model saved as 'air_quality_model.tflite'


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

# Load dataset
df = pd.read_excel("Data_Air Pollutant.xlsx")
df = df.dropna()

# Select only numeric columns
numeric_df = df.select_dtypes(include=[np.number])

# Select top correlated features with PM2.5
correlations = numeric_df.corr()['PM2.5'].sort_values(ascending=False)
top_features = correlations.index[1:7]  # Top 6 features (excluding PM2.5)

# Features and target
X = numeric_df[top_features]
y = numeric_df['PM2.5']

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.fit(X_train, y_train, epochs=200, batch_size=16, validation_split=0.1, callbacks=[early_stop])

# Evaluation
loss, mae = model.evaluate(X_test, y_test, verbose=0)
y_pred = model.predict(X_test).flatten()
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
custom_accuracy = np.mean(np.abs(y_test - y_pred) <= 10) * 100

print(f"\n📊 Model Performance:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")
print(f"Custom Accuracy (±10): {custom_accuracy:.2f}%")

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open("optimized_air_quality_model.tflite", "wb") as f:
    f.write(tflite_model)

print("\n✅ Saved as 'optimized_air_quality_model.tflite'")


Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 1352.3311 - mae: 32.4992 - val_loss: 1357.9688 - val_mae: 34.2825
Epoch 2/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1402.7472 - mae: 33.4175 - val_loss: 1295.7438 - val_mae: 33.4115
Epoch 3/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1234.1405 - mae: 31.2630 - val_loss: 1164.3541 - val_mae: 31.4946
Epoch 4/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1216.2557 - mae: 30.4155 - val_loss: 908.1902 - val_mae: 27.3753
Epoch 5/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 813.9254 - mae: 24.5274 - val_loss: 501.6269 - val_mae: 19.5856
Epoch 6/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 492.1249 - mae: 17.0389 - val_loss: 149.9972 - val_mae: 9.9589
Epoch 7/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0