In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.metrics import accuracy_score,confusion_matrix

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df=pd.read_csv('https://drive.google.com/file/d/1pgIVOwSe12cLAr7k60OQlGGiIli0LtNz/view?usp=share_link',encoding='unicode_escape')
# Reading the dataset

In [None]:
def cal_Noi(no2):
    ni=0
    if(no2<=40):
     ni= no2*50/40
    elif(no2>40 and no2<=80):
     ni= 50+(no2-40)*(50/40)
    elif(no2>80 and no2<=180):
     ni= 100+(no2-80)*(100/100)
    elif(no2>180 and no2<=280):
     ni= 200+(no2-180)*(100/100)
    elif(no2>280 and no2<=400):
     ni= 300+(no2-280)*(100/120)
    else:
     ni= 400+(no2-400)*(100/120)
    return ni
df['Noi']=df['no2'].apply(cal_Noi)
data= df[['no2','Noi']]
data.head()

In [None]:
def cal_RSPMI(rspm):
    rpi=0
    if(rpi<=30):
     rpi=rpi*50/30
    elif(rpi>30 and rpi<=60):
     rpi=50+(rpi-30)*50/30
    elif(rpi>60 and rpi<=90):
     rpi=100+(rpi-60)*100/30
    elif(rpi>90 and rpi<=120):
     rpi=200+(rpi-90)*100/30
    elif(rpi>120 and rpi<=250):
     rpi=300+(rpi-120)*(100/130)
    else:
     rpi=400+(rpi-250)*(100/130)
    return rpi
df['Rpi']=df['rspm'].apply(cal_RSPMI)
data= df[['rspm','Rpi']]
data.head()

In [None]:
def cal_SPMi(spm):
    spi=0
    if(spm<=50):
     spi=spm*50/50
    elif(spm>50 and spm<=100):
     spi=50+(spm-50)*(50/50)
    elif(spm>100 and spm<=250):
     spi= 100+(spm-100)*(100/150)
    elif(spm>250 and spm<=350):
     spi=200+(spm-250)*(100/100)
    elif(spm>350 and spm<=430):
     spi=300+(spm-350)*(100/80)
    else:
     spi=400+(spm-430)*(100/430)
    return spi

df['SPMi']=df['spm'].apply(cal_SPMi)
data= df[['spm','SPMi']]
data.head()

In [None]:
def cal_aqi(si,ni,rspmi,spmi):
    aqi=0
    if(si>ni and si>rspmi and si>spmi):
     aqi=si
    if(ni>si and ni>rspmi and ni>spmi):
     aqi=ni
    if(rspmi>si and rspmi>ni and rspmi>spmi):
     aqi=rspmi
    if(spmi>si and spmi>ni and spmi>rspmi):
     aqi=spmi
    return aqi

df['AQI']=df.apply(lambda x:cal_aqi(x['SOi'],x['Noi'],x['Rpi'],x['SPMi']),axis=1)
data= df[['state','SOi','Noi','Rpi','SPMi','AQI']]
data.head()

In [None]:
def AQI_Range(x):
    if x<=50:
        return "Good"
    elif x>50 and x<=100:
        return "Moderate"
    elif x>100 and x<=200:
        return "Poor"
    elif x>200 and x<=300:
        return "Unhealthy"
    elif x>300 and x<=400:
        return "Very unhealthy"
    elif x>400:
        return "Hazardous"

df['AQI_Range'] = df['AQI'] .apply(AQI_Range)
df

In [None]:
nullvalues = df.isnull().sum().sort_values(ascending=False)

In [None]:
nullvalues

In [None]:
null_values_percentage = (df.isnull().sum()/df.isnull().count()*100).sort_values(ascending=False)

In [None]:
missing_data_with_percentage = pd.concat([nullvalues, null_values_percentage], axis=1, keys=['Total', 'Percent'])

In [None]:
missing_data_with_percentage

In [None]:
df.drop(['agency'],axis=1,inplace=True)
df.drop(['stn_code'],axis=1,inplace=True)
df.drop(['date'],axis=1,inplace=True)
df.drop(['sampling_date'],axis=1,inplace=True)
df.drop(['location_monitoring_station'],axis=1,inplace=True)

In [None]:
df.isnull().sum()

In [None]:
df['location']=df['location'].fillna(df['location'].mode()[0])
df['type']=df['type'].fillna(df['type'].mode()[0])
# Null value Imputation for categorical data

In [None]:
df.fillna(0, inplace=True)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import joblib

In [None]:
X = df[['SOi','Noi','Rpi','SPMi']]
Y = df['AQI_Range']
# Splitting the data into independent and dependent columns for classification

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=70)
# Splitting the data into training and testing data

### K-Nearest Neighbours

In [None]:
#fit the model on train data
model = KNeighborsClassifier()
model.fit(X_train, Y_train)
KNN = KNeighborsClassifier().fit(X_train,Y_train)
filename = 'Trained_model.sav'
joblib.dump(model, filename)
#predict on train
train_preds = KNN.predict(X_train)
#accuracy on train
print("Model accuracy on train is: ", accuracy_score(Y_train, train_preds))

#predict on test
test_preds = KNN.predict(X_test)
#accuracy on test
print("Model accuracy on test is: ", accuracy_score(Y_test, test_preds))
print('-'*50)

# Kappa Score
print('KappaScore is: ', metrics.cohen_kappa_score(Y_test,test_preds))

In [None]:
KNN.predict([[7.4,47.7,78.182,100]])
# Predictions on random values

In [None]:
KNN.predict([[1,1.2,3.12,0]])
# Predictions on random values

In [None]:
KNN.predict([[325.7,345,798.182,203]])
# Predictions on random values

In [None]:
! pip install -q streamlit

In [None]:
! pip install pyngrok

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import joblib

st.title('Air Quality Index Predictor')

SO2I = st.number_input("Enter the SO2I value:")
NO2I = st.number_input("Enter the NO2I value:")
RPI = st.number_input("Enter the RPI value:")
SPMI = st.number_input("Enter the SPMI value:")
loaded_model = joblib.load('Trained_model.sav')
inputs = (SO2I, NO2I, RPI, SPMI)
if st.button("Predict"):
    result = loaded_model.predict([inputs])
    st.write(result)

In [None]:
!ngrok authtoken 2Kgk0JPvh53BRHvj2SCxcZI3YPN_2eDZG97kbtqGH7xy3yD88

In [None]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip

In [None]:
!unzip ngrok-stable-linux-amd64.zip

In [None]:
get_ipython().system_raw('./ngrok http 8501 &')

In [None]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

In [None]:
!streamlit run /content/app.py