# SVM, Naive Bayes, KNN, Random Forest and Decision tree

# Importing necessary libraries

In [116]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset & Apply Label Encoder 

In [51]:
df=pd.read_csv('DataSets/4.Classification-Pollution_dataset-9i & 4or2 o.csv')

In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 11 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Temperature                    5000 non-null   float64
 1   Humidity                       5000 non-null   float64
 2   PM2.5                          5000 non-null   float64
 3   PM10                           5000 non-null   float64
 4   NO2                            5000 non-null   float64
 5   SO2                            5000 non-null   float64
 6   CO                             5000 non-null   float64
 7   Proximity_to_Industrial_Areas  5000 non-null   float64
 8   Population_Density             5000 non-null   int64  
 9   Air Quality                    5000 non-null   object 
 10  Binaary_Class                  5000 non-null   object 
dtypes: float64(8), int64(1), object(2)
memory usage: 429.8+ KB


In [61]:
df.head()

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality,Binaary_Class
0,29.8,59.1,5.2,17.9,18.9,9.2,1.72,6.3,319,Moderate,Good
1,28.3,75.6,2.3,12.2,30.8,9.7,1.64,6.0,611,Moderate,Good
2,23.1,74.7,26.7,33.8,24.4,12.6,1.63,5.2,619,Moderate,Good
3,27.1,39.1,6.1,6.3,13.5,5.3,1.15,11.1,551,Good,Good
4,26.5,70.7,6.9,16.0,21.9,5.6,1.01,12.7,303,Good,Good


In [69]:
df.drop(columns=['Binaary_Class'], axis=1, inplace=True)

In [71]:
df.head()

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality
0,29.8,59.1,5.2,17.9,18.9,9.2,1.72,6.3,319,Moderate
1,28.3,75.6,2.3,12.2,30.8,9.7,1.64,6.0,611,Moderate
2,23.1,74.7,26.7,33.8,24.4,12.6,1.63,5.2,619,Moderate
3,27.1,39.1,6.1,6.3,13.5,5.3,1.15,11.1,551,Good
4,26.5,70.7,6.9,16.0,21.9,5.6,1.01,12.7,303,Good


In [73]:
# Initialize the LabelEncoder
label_encoder = LabelEncoder()

In [75]:
# Apply Label Encoding to 'Color' column
df['Air Quality'] = label_encoder.fit_transform(df['Air Quality'])

In [77]:
df.head()

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality
0,29.8,59.1,5.2,17.9,18.9,9.2,1.72,6.3,319,2
1,28.3,75.6,2.3,12.2,30.8,9.7,1.64,6.0,611,2
2,23.1,74.7,26.7,33.8,24.4,12.6,1.63,5.2,619,2
3,27.1,39.1,6.1,6.3,13.5,5.3,1.15,11.1,551,0
4,26.5,70.7,6.9,16.0,21.9,5.6,1.01,12.7,303,0


### Features and target variable

In [82]:
X=df.drop(columns=['Air Quality'],axis=1)
y=df['Air Quality']

### Train-test split (80% train, 20% test)

In [86]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

### Feature Scaling (Standardization)

In [91]:
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

# Initialize classifiers

In [119]:
svm = SVC(kernel='linear',random_state=42)
nb = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=5)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
dtree = DecisionTreeClassifier(random_state=42)

### Train and evaluate SVM

In [103]:
svm.fit(X_train, y_train)
svm_pred = svm.predict(X_test)
print('-'*55)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print('-'*55)

print("SVM Confusion Matrix:")
print(confusion_matrix(y_test, svm_pred))
print('-'*55)

print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))
print('-'*55)


-------------------------------------------------------
SVM Accuracy: 0.945
-------------------------------------------------------
SVM Confusion Matrix:
[[409   0   0   0]
 [  0  91   0  20]
 [  0   0 282  12]
 [  0   9  14 163]]
-------------------------------------------------------
SVM Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       409
           1       0.91      0.82      0.86       111
           2       0.95      0.96      0.96       294
           3       0.84      0.88      0.86       186

    accuracy                           0.94      1000
   macro avg       0.92      0.91      0.92      1000
weighted avg       0.95      0.94      0.94      1000

-------------------------------------------------------


### Train and evaluate Naive Bayes

In [106]:
nb.fit(X_train, y_train)
nb_pred = nb.predict(X_test)
print('-'*55)
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_pred))
print('-'*55)
print("Naive Bayes Confusion Matrix:")
print(confusion_matrix(y_test, nb_pred))
print('-'*55)
print("Naive Bayes Classification Report:")
print(classification_report(y_test, nb_pred))
print('-'*55)

-------------------------------------------------------
Naive Bayes Accuracy: 0.926
-------------------------------------------------------
Naive Bayes Confusion Matrix:
[[405   0   4   0]
 [  0  85   0  26]
 [  1   1 275  17]
 [  0  11  14 161]]
-------------------------------------------------------
Naive Bayes Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       409
           1       0.88      0.77      0.82       111
           2       0.94      0.94      0.94       294
           3       0.79      0.87      0.83       186

    accuracy                           0.93      1000
   macro avg       0.90      0.89      0.89      1000
weighted avg       0.93      0.93      0.93      1000

-------------------------------------------------------


### Train and evaluate KNN

In [111]:
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
print('-'*55)
print("KNN Accuracy:", accuracy_score(y_test, knn_pred))
print('-'*55)
print("KNN Confusion Matrix:")
print(confusion_matrix(y_test, knn_pred))
print('-'*55)
print("KNN Classification Report:")
print(classification_report(y_test, knn_pred))
print('-'*55)

-------------------------------------------------------
KNN Accuracy: 0.935
-------------------------------------------------------
KNN Confusion Matrix:
[[409   0   0   0]
 [  0  80   0  31]
 [  3   0 282   9]
 [  0   4  18 164]]
-------------------------------------------------------
KNN Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       409
           1       0.95      0.72      0.82       111
           2       0.94      0.96      0.95       294
           3       0.80      0.88      0.84       186

    accuracy                           0.94      1000
   macro avg       0.92      0.89      0.90      1000
weighted avg       0.94      0.94      0.93      1000

-------------------------------------------------------


### Train and evaluate Random Forest

In [114]:
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
print('-'*55)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print('-'*55)
print("Random Forest Confusion Matrix:")
print(confusion_matrix(y_test, rf_pred))
print('-'*55)
print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred))
print('-'*55)

-------------------------------------------------------
Random Forest Accuracy: 0.96
-------------------------------------------------------
Random Forest Confusion Matrix:
[[409   0   0   0]
 [  0  98   0  13]
 [  0   0 285   9]
 [  0   9   9 168]]
-------------------------------------------------------
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       409
           1       0.92      0.88      0.90       111
           2       0.97      0.97      0.97       294
           3       0.88      0.90      0.89       186

    accuracy                           0.96      1000
   macro avg       0.94      0.94      0.94      1000
weighted avg       0.96      0.96      0.96      1000

-------------------------------------------------------


### Train and evaluate Decision Tree

In [125]:
dtree.fit(X_train, y_train)
dtree_pred = dtree.predict(X_test)
print('-'*55)
print("Decision Tree Accuracy:", accuracy_score(y_test, dtree_pred))
print('-'*55)
print("Decision Tree Confusion Matrix:")
print(confusion_matrix(y_test, dtree_pred))
print('-'*55)
print("Decision Tree Classification Report:")
print(classification_report(y_test, dtree_pred))
print('-'*55)

-------------------------------------------------------
Decision Tree Accuracy: 0.921
-------------------------------------------------------
Decision Tree Confusion Matrix:
[[408   0   1   0]
 [  0  96   0  15]
 [  3   2 268  21]
 [  0  28   9 149]]
-------------------------------------------------------
Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       409
           1       0.76      0.86      0.81       111
           2       0.96      0.91      0.94       294
           3       0.81      0.80      0.80       186

    accuracy                           0.92      1000
   macro avg       0.88      0.89      0.89      1000
weighted avg       0.92      0.92      0.92      1000

-------------------------------------------------------
