In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RepeatedKFold

In [None]:
path = "/content/drive/MyDrive/Data Science/Project-46 Air Quality Index Predictor Using TPOT With  Deployment (Auto ML)/AQI Data.csv"

In [None]:
df = pd.read_csv(path)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# check null values
df.isna().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.nunique()

In [None]:
plt.figure(figsize=(10, 9))
sns.heatmap(df.describe(), annot=True, linewidths=0.5, fmt='.1f', label=df.columns)
plt.show()

In [None]:
sns.histplot(data=df, x='PM 2.5', kde=True,  element="poly")
plt.show()

In [None]:
sns.set_style('whitegrid')

In [None]:
for i in df.columns:
  sns.histplot(data=df, x=i, kde=True, label=i,  element="step")
  plt.show()

In [None]:
plt.figure(figsize=(18, 10))
sns.heatmap(df.corr(), annot=True, cmap='viridis', linewidths=0.5)
plt.show()

## Plotting s QQ Plot

In [None]:
import scipy.stats as stat
import pylab

In [None]:
def plot_curve(col):
  plt.figure(figsize=(12, 6))
  plt.subplot(1, 2, 1)
  plt.title(col)
  plt.hist(df[col])

  plt.subplot(1, 2, 2)
  plt.title(col)
  stat.probplot(df[col], dist='norm', plot=pylab)
  plt.show()

In [None]:
for col in df.columns:
  plot_curve(col)

## Splitting data 

In [None]:
x = df.drop('PM 2.5', axis=1)
y = df['PM 2.5'].values

In [None]:
# scaler = StandardScaler().fit(x)
# x = scaler.transform(x)

In [None]:
# split into train Test dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=44)

## Feature Importance

In [None]:
from sklearn.ensemble import ExtraTreesRegressor

In [None]:
model_er = ExtraTreesRegressor(n_estimators=300, n_jobs=-1)

In [None]:
model_er.fit(x, y)

In [None]:
model_er.feature_importances_

In [None]:
plt.bar(height=model_er.feature_importances_, x=df.columns[:8])
plt.show()

## Import Regressing Model

In [None]:
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

In [None]:
all_models = [LinearRegression, Lasso, DecisionTreeRegressor, RandomForestRegressor, XGBRegressor]

In [None]:
def model_score(model):
  model = model()
  model.fit(x_train, y_train)
  print(type(model).__name__)
  print("Training Score", model.score(x_train, y_train))
  print("Testing Score", model.score(x_train, y_train))
  return model

In [None]:
models = {}

for i in all_models:
  model = model_score(i)
  models[type(model).__name__] = model
  print('-------'*20)

In [None]:
model = models["RandomForestRegressor"]

In [None]:
pred = model.predict(x_test)

In [None]:
sns.scatterplot(y_test, pred)
plt.show()

In [None]:
sns.histplot(y_test-pred)
plt.show()

In [None]:
metrics.r2_score(y_test, pred)

In [None]:
print("MAE", metrics.mean_absolute_error(y_test, pred))
print("MSE", metrics.mean_squared_error(y_test, pred))
print("RMSE", np.sqrt(metrics.mean_squared_error(y_test, pred)))

## Auto ML (TPOT auto ml)

In [None]:
# ! pip install tpot

In [None]:
import tpot

In [None]:
from tpot import TPOTRegressor

In [None]:
cv = RepeatedKFold(n_repeats=3, n_splits=10, random_state=11)

In [None]:
model = TPOTRegressor(generations=5, population_size=50, scoring='neg_mean_absolute_error',
                      cv = cv, verbosity=2, random_state=44, n_jobs=-1
                      )

In [None]:
model.fit(x_train, y_train)

In [None]:
print(model.score(x_test, y_test))

In [None]:
model.fitted_pipeline_

In [None]:
pred = model.predict(x_test)

In [None]:
sns.histplot(y_test-pred)
plt.show()

In [None]:
sns.scatterplot(x=y_test, y=pred)
plt.show()

In [None]:
print("MAE", metrics.mean_absolute_error(y_test, pred))
print("MSE", metrics.mean_squared_error(y_test, pred))
print("RMSE", np.sqrt(metrics.mean_squared_error(y_test, pred)))

In [None]:
metrics.r2_score(y_test, pred)