In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Load given dataset
data = pd.read_csv("city_day.csv")

df = data.dropna()

del df['Date']
del df['PM2.5']
del df['PM10']
del df['NO']
del df['NO2']
del df['NOx']
del df['NH3']
del df['CO']
del df['SO2']
del df['O3']
del df['Benzene']
del df['Toluene']
del df['Xylene']

# Select only 'City' and 'AQI' columns
df = df[['City', 'AQI']]

# Remove classes with only one member
counts = df['City'].value_counts()
df = df[~df['City'].isin(counts[counts == 1].index)]

# Split into X (input) and y (output)
X = df['City']
y = df['AQI']

# Split into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Encode categorical variable 'City'
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X_train = le.fit_transform(X_train)
X_test = le.transform(X_test)

# Train a Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_train.reshape(-1, 1), y_train)

# Predict on test set
predicted = rf.predict(X_test.reshape(-1, 1))

# Evaluate model performance
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, plot_confusion_matrix
accuracy = accuracy_score(y_test, predicted)
print('Accuracy of RandomForestClassifier:', accuracy*100)

cr = classification_report(y_test, predicted)
print('Classification report:\n\n', cr)

cm = confusion_matrix(y_test, predicted)
print('Confusion matrix:\n\n', cm)

import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10,10))
plot_confusion_matrix(rf, X_test.reshape(-1, 1), y_test, ax=ax)
plt.title('Confusion matrix of RandomForestClassifier')
plt.show()

# Save the trained model
from joblib import dump
dump(rf, 'RF.pkl')


Accuracy of RandomForestClassifier: 0.85515766969535
Classification report:

               precision    recall  f1-score   support

        26.0       0.00      0.00      0.00         1
        27.0       0.00      0.00      0.00         1
        28.0       0.00      0.00      0.00         1
        29.0       0.00      0.00      0.00         1
        30.0       0.00      0.00      0.00         2
        31.0       0.00      0.00      0.00         2
        32.0       0.00      0.00      0.00         3
        33.0       0.00      0.00      0.00         3
        34.0       0.00      0.00      0.00         2
        35.0       0.00      0.00      0.00         4
        36.0       0.00      0.00      0.00         4
        37.0       0.00      0.00      0.00         7
        38.0       0.00      0.00      0.00         4
        39.0       0.00      0.00      0.00         6
        40.0       0.00      0.00      0.00         2
        41.0       0.00      0.00      0.00         8
   