In [1]:
import pandas as pd
import numpy as np

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline

In [5]:
df = pd.read_csv("dataset//City_Types.csv")

**First we will use Train_test_split**

In [6]:
X_train,X_test, y_train,y_test = train_test_split(df.iloc[:,1:8], df.iloc[:,-1], test_size=0.2, random_state=0)

***Convert the output column(y_train and y_test) to numerical column***

In [7]:
le = LabelEncoder()

In [8]:
y_train = le.fit_transform(y_train)

In [9]:
y_test = le.transform(y_test)

***Create Transformations to fit in the pipeline***

In [10]:
trf1 = ColumnTransformer(transformers=[
    ('tf1', StandardScaler(), [1,2,3,4,5,6]),
    ('tf2', OneHotEncoder(sparse_output=False, dtype=np.int32, drop='first'), [0])
], remainder='passthrough')

In [11]:
trf2 = DecisionTreeClassifier()

In [12]:
pipe = Pipeline([
    ('tf1', trf1),
    ('tf2', trf2)
])

In [13]:
pipe.fit(X_train,y_train)

In [14]:
y_pred = pipe.predict(X_test)

In [15]:
score = accuracy_score(y_test, y_pred)

In [16]:
score

0.9994307940423109

***Our Pipeline is ready to export***

In [17]:
import pickle as pkl

In [19]:
pkl.dump(pipe, open("models//Air_type_classifier_pipe.pkl", 'wb'))

In [20]:
y_pred

array([1, 1, 1, ..., 0, 1, 1])

In [21]:
X_test

Unnamed: 0,City,CO,NO2,SO2,O3,PM2.5,PM10
42852,Vancouver,149.0,9.6,1.5,68.0,5.0,6.7
34612,Zurich,225.0,5.4,0.9,38.0,10.0,10.2
49862,Stockholm,205.0,12.5,1.0,87.0,13.4,17.5
48656,Stockholm,137.0,7.4,0.4,81.0,4.3,6.0
47859,Stockholm,133.0,14.9,0.5,53.0,4.8,8.4
...,...,...,...,...,...,...,...
49181,Stockholm,216.0,22.4,0.7,36.0,8.5,13.1
23977,Beijing,1330.0,46.6,71.4,9.0,88.2,126.2
33,Moscow,394.0,34.1,34.1,35.0,20.9,25.7
43039,Vancouver,1013.0,30.7,5.2,45.0,4.6,4.9
