In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib 

# Data load: load city_day into a dataframe 

In [2]:
df1 = pd.read_csv("city_day.csv")
df1.head()

Unnamed: 0,City,Date,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,Toluene,Xylene,AQI,AQI_Bucket
0,Ahmedabad,2015-01-01,,,0.92,18.22,17.15,,0.92,27.64,133.36,0.0,0.02,0.0,,
1,Ahmedabad,2015-01-02,,,0.97,15.69,16.46,,0.97,24.55,34.06,3.68,5.5,3.77,,
2,Ahmedabad,2015-01-03,,,17.4,19.3,29.7,,17.4,29.07,30.7,6.8,16.4,2.25,,
3,Ahmedabad,2015-01-04,,,1.7,18.48,17.97,,1.7,18.59,36.08,4.43,10.14,1.0,,
4,Ahmedabad,2015-01-05,,,22.1,21.42,37.76,,22.1,39.33,39.31,7.01,18.89,2.78,,


In [3]:
df1.shape

(29531, 16)

In [4]:
df1.columns

Index(['City', 'Date', 'PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2',
       'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI', 'AQI_Bucket'],
      dtype='object')

# Data Cleaning: Handle NA values

In [5]:
df1.isnull().sum()

City              0
Date              0
PM2.5          4598
PM10          11140
NO             3582
NO2            3585
NOx            4185
NH3           10328
CO             2059
SO2            3854
O3             4022
Benzene        5623
Toluene        8041
Xylene        18109
AQI            4681
AQI_Bucket     4681
dtype: int64

**Drop NA values of AQI_Bucket, since it's our predictive values we won't impute it**

In [6]:
df2 = df1.dropna(subset=['AQI_Bucket'], inplace=True)

In [7]:
df1.isna().sum()

City              0
Date              0
PM2.5           678
PM10           7086
NO              387
NO2             391
NOx            1857
NH3            6536
CO              445
SO2             605
O3              807
Benzene        3535
Toluene        5826
Xylene        15372
AQI               0
AQI_Bucket        0
dtype: int64

In [8]:
df2 = df1

In [9]:
df2.shape

(24850, 16)

**Drop the 'Date' and 'AQI_Bucket' columns as they have no role in prediction, AQI values are enough**

In [10]:
df2 = df2.drop(columns=['Date', 'AQI_Bucket'])
df2.head()

Unnamed: 0,City,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,Toluene,Xylene,AQI
28,Ahmedabad,83.13,,6.93,28.71,33.72,,6.93,49.52,59.76,0.02,0.0,3.14,209.0
29,Ahmedabad,79.84,,13.85,28.68,41.08,,13.85,48.49,97.07,0.04,0.0,4.81,328.0
30,Ahmedabad,94.52,,24.39,32.66,52.61,,24.39,67.39,111.33,0.24,0.01,7.67,514.0
31,Ahmedabad,135.99,,43.48,42.08,84.57,,43.48,75.23,102.7,0.4,0.04,25.87,782.0
32,Ahmedabad,178.33,,54.56,35.31,72.8,,54.56,55.04,107.38,0.46,0.06,35.61,914.0


In [11]:
df2.isnull().sum()

City           0
PM2.5        678
PM10        7086
NO           387
NO2          391
NOx         1857
NH3         6536
CO           445
SO2          605
O3           807
Benzene     3535
Toluene     5826
Xylene     15372
AQI            0
dtype: int64

**Check if there are some rows with missing values**

In [12]:
missing_rows = df2[df2.isna().all(axis=1)]
if not missing_rows.empty:
    print("Rows with missing values:")
    missing_rows
else:
    print("No rows with missing values.")

No rows with missing values.


**Remove the duplicate rows**

In [13]:
df2.drop_duplicates()

Unnamed: 0,City,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,Toluene,Xylene,AQI
28,Ahmedabad,83.13,,6.93,28.71,33.72,,6.93,49.52,59.76,0.02,0.00,3.14,209.0
29,Ahmedabad,79.84,,13.85,28.68,41.08,,13.85,48.49,97.07,0.04,0.00,4.81,328.0
30,Ahmedabad,94.52,,24.39,32.66,52.61,,24.39,67.39,111.33,0.24,0.01,7.67,514.0
31,Ahmedabad,135.99,,43.48,42.08,84.57,,43.48,75.23,102.70,0.40,0.04,25.87,782.0
32,Ahmedabad,178.33,,54.56,35.31,72.80,,54.56,55.04,107.38,0.46,0.06,35.61,914.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29526,Visakhapatnam,15.02,50.94,7.68,25.06,19.54,12.47,0.47,8.55,23.30,2.24,12.07,0.73,41.0
29527,Visakhapatnam,24.38,74.09,3.42,26.06,16.53,11.99,0.52,12.72,30.14,0.74,2.21,0.38,70.0
29528,Visakhapatnam,22.91,65.73,3.45,29.53,18.33,10.71,0.48,8.42,30.96,0.01,0.01,0.00,68.0
29529,Visakhapatnam,16.64,49.97,4.05,29.26,18.80,10.03,0.52,9.84,28.30,0.00,0.00,0.00,54.0


# Use One Hot Encoding for City

In [14]:
dummies = pd.get_dummies(df2.City)
dummies.head(3)

Unnamed: 0,Ahmedabad,Aizawl,Amaravati,Amritsar,Bengaluru,Bhopal,Brajrajnagar,Chandigarh,Chennai,Coimbatore,...,Jorapokhar,Kochi,Kolkata,Lucknow,Mumbai,Patna,Shillong,Talcher,Thiruvananthapuram,Visakhapatnam
28,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
df3 = pd.concat([df2,dummies],axis='columns')
df3.head()

Unnamed: 0,City,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,...,Jorapokhar,Kochi,Kolkata,Lucknow,Mumbai,Patna,Shillong,Talcher,Thiruvananthapuram,Visakhapatnam
28,Ahmedabad,83.13,,6.93,28.71,33.72,,6.93,49.52,59.76,...,0,0,0,0,0,0,0,0,0,0
29,Ahmedabad,79.84,,13.85,28.68,41.08,,13.85,48.49,97.07,...,0,0,0,0,0,0,0,0,0,0
30,Ahmedabad,94.52,,24.39,32.66,52.61,,24.39,67.39,111.33,...,0,0,0,0,0,0,0,0,0,0
31,Ahmedabad,135.99,,43.48,42.08,84.57,,43.48,75.23,102.7,...,0,0,0,0,0,0,0,0,0,0
32,Ahmedabad,178.33,,54.56,35.31,72.8,,54.56,55.04,107.38,...,0,0,0,0,0,0,0,0,0,0


In [16]:
df4 = df3.drop('City',axis='columns')
df4.head(2)

Unnamed: 0,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,...,Jorapokhar,Kochi,Kolkata,Lucknow,Mumbai,Patna,Shillong,Talcher,Thiruvananthapuram,Visakhapatnam
28,83.13,,6.93,28.71,33.72,,6.93,49.52,59.76,0.02,...,0,0,0,0,0,0,0,0,0,0
29,79.84,,13.85,28.68,41.08,,13.85,48.49,97.07,0.04,...,0,0,0,0,0,0,0,0,0,0


In [17]:
df4.isnull().sum()

PM2.5                   678
PM10                   7086
NO                      387
NO2                     391
NOx                    1857
NH3                    6536
CO                      445
SO2                     605
O3                      807
Benzene                3535
Toluene                5826
Xylene                15372
AQI                       0
Ahmedabad                 0
Aizawl                    0
Amaravati                 0
Amritsar                  0
Bengaluru                 0
Bhopal                    0
Brajrajnagar              0
Chandigarh                0
Chennai                   0
Coimbatore                0
Delhi                     0
Ernakulam                 0
Gurugram                  0
Guwahati                  0
Hyderabad                 0
Jaipur                    0
Jorapokhar                0
Kochi                     0
Kolkata                   0
Lucknow                   0
Mumbai                    0
Patna                     0
Shillong            

# Dealing with the null values using iterative imputer

In [18]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
imputedf = df4
imputer = IterativeImputer(max_iter=10, random_state=0)
imputed_data = imputer.fit_transform(imputedf)
imputed_df = pd.DataFrame(imputed_data, columns=imputedf.columns)
imputed_df.head()



Unnamed: 0,PM2.5,PM10,NO,NO2,NOx,NH3,CO,SO2,O3,Benzene,...,Jorapokhar,Kochi,Kolkata,Lucknow,Mumbai,Patna,Shillong,Talcher,Thiruvananthapuram,Visakhapatnam
0,83.13,73.268783,6.93,28.71,33.72,21.072129,6.93,49.52,59.76,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,79.84,92.705113,13.85,28.68,41.08,15.387426,13.85,48.49,97.07,0.04,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,94.52,135.514777,24.39,32.66,52.61,9.275769,24.39,67.39,111.33,0.24,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,135.99,206.395225,43.48,42.08,84.57,47.973367,43.48,75.23,102.7,0.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,178.33,246.388263,54.56,35.31,72.8,69.207813,54.56,55.04,107.38,0.46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
imputed_df.isnull().sum()

PM2.5                 0
PM10                  0
NO                    0
NO2                   0
NOx                   0
NH3                   0
CO                    0
SO2                   0
O3                    0
Benzene               0
Toluene               0
Xylene                0
AQI                   0
Ahmedabad             0
Aizawl                0
Amaravati             0
Amritsar              0
Bengaluru             0
Bhopal                0
Brajrajnagar          0
Chandigarh            0
Chennai               0
Coimbatore            0
Delhi                 0
Ernakulam             0
Gurugram              0
Guwahati              0
Hyderabad             0
Jaipur                0
Jorapokhar            0
Kochi                 0
Kolkata               0
Lucknow               0
Mumbai                0
Patna                 0
Shillong              0
Talcher               0
Thiruvananthapuram    0
Visakhapatnam         0
dtype: int64

In [20]:
df5 = imputed_df

In [21]:
df5.shape

(24850, 39)

# Training and testing the model

**Let's bifurcate our dataset into train and test dataset**

In [22]:
X = df5.drop('AQI', axis=1)
y = df5['AQI']

In [23]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=20)

**Let's train our Linear Regression Model on training dataset and check the accuracy on test set**

In [24]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(train_X, train_y)

In [25]:
reg.score(test_X, test_y)

0.9313932921362368

In [26]:
reg.score(train_X, train_y)

0.9267333178058716

# Use K Fold cross validation to measure accuracy of our LinearRegression model

In [27]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv = ShuffleSplit(n_splits=10, test_size=0.3, random_state=0)

cross_val_score(LinearRegression(), X, y, cv=cv)

array([0.92756559, 0.92201243, 0.92934045, 0.92622779, 0.93289264,
       0.931709  , 0.93265881, 0.93411567, 0.93355319, 0.93363539])

**We can see that in 10 iterations we get a score around 93% all the time except for 1 iteration. This is pretty good but we want to test few other algorithms for regression to see if we can get even better score. We will use GridSearchCV for this purpose**

# Comparing different models with different hyperparameters using GridSearchCV

In [29]:
!pip install lightgbm
!pip install xgboost

Collecting lightgbm
  Downloading lightgbm-4.0.0-py3-none-win_amd64.whl (1.3 MB)
     ---------------------------------------- 1.3/1.3 MB 3.9 MB/s eta 0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-4.0.0
Collecting xgboost
  Downloading xgboost-1.7.6-py3-none-win_amd64.whl (70.9 MB)
     ---------------------------------------- 70.9/70.9 MB 5.8 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-1.7.6


In [30]:
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [31]:
from sklearn.model_selection import GridSearchCV

In [32]:
def find_best_model_using_gridsearchcv(X,y):
    algos = {
        'linear_reg' : {
            'model': LinearRegression(),
            'params': {
                'fit_intercept': [True, False]
            }
        },
        'svm_reg': {
            'model': SVR(),
            'params': {
                'kernel': ['rbf'],
                'C': [0.1,1.0,10.0],
                'epsilon': [0.01, 0.1, 0.5]
            }
        },
        'dectree_reg': {
            'model': DecisionTreeRegressor(),
            'params': {
                'max_depth': [None, 5, 10],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
            }
        },
        'knn_reg': {
            'model': KNeighborsRegressor(),
            'params': {
                'n_neighbors': [3, 5, 7],
                'weights': ['uniform', 'distance'],
                'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
            }
        },
        'randomfor_reg': {
            'model': RandomForestRegressor(),
            'params': {
                'n_estimators': [50, 100, 150],
                'max_depth': [None, 10, 20],
                'min_samples_split': [2, 5, 10]
            }
        },
        'lgbm_reg': {
            'model': LGBMRegressor(),
            'params': {
                'n_estimators': [50, 100, 150],
                'learning_rate': [0.01, 0.1, 0.2],
                'max_depth': [5, 10, 15]
            }
        },
        'xgb_reg': {
            'model': XGBRegressor(),
            'params': {
                'n_estimators': [50, 100, 150],
                'learning_rate': [0.01, 0.1, 0.2],
                'max_depth': [5, 10, 15]
            }
        }
    }
    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.3, random_state=0)
    for algo_name, config in algos.items():
        gs =  GridSearchCV(config['model'], config['params'], cv=cv, return_train_score=False)
        gs.fit(X,y)
        scores.append({
            'model': algo_name,
            'best_score': gs.best_score_,
            'best_params': gs.best_params_
        })

    return pd.DataFrame(scores,columns=['model','best_score','best_params'])

In [33]:
find_best_model_using_gridsearchcv(X,y)

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_row_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.188617
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.273412
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.641506
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 165.987180
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.921242
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.309572
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to rem

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.032078
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.284277
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 167.018626
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.720609
You can set `force_col_wise=true` to rem

Unnamed: 0,model,best_score,best_params
0,linear_reg,0.930371,{'fit_intercept': True}
1,svm_reg,0.80452,"{'C': 10.0, 'epsilon': 0.5, 'kernel': 'rbf'}"
2,dectree_reg,0.918696,"{'max_depth': 10, 'min_samples_leaf': 4, 'min_..."
3,knn_reg,0.923195,"{'algorithm': 'auto', 'n_neighbors': 5, 'weigh..."
4,randomfor_reg,0.94742,"{'max_depth': None, 'min_samples_split': 2, 'n..."
5,lgbm_reg,0.954823,"{'learning_rate': 0.2, 'max_depth': 10, 'n_est..."
6,xgb_reg,0.95418,"{'learning_rate': 0.2, 'max_depth': 5, 'n_esti..."


**Based on above results we can say that LGBMRegressor gives the best score. Hence we will use that.**

In [58]:
lgbm = LGBMRegressor(n_estimators=150, learning_rate=0.2, max_depth=10, num_leaves=40)
lgbm.fit(train_X,train_y)
lgbm.score(test_X,test_y)

You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3112
[LightGBM] [Info] Number of data points in the train set: 17395, number of used features: 38
[LightGBM] [Info] Start training from score 166.883702


0.9514547948729138

# Test the model for few properties

In [59]:
def predict_AQI(city,pm2_5,pm10,no,no2,nox,nh3,co,so2,o3,benzene,toluene,xylene):    
    city_index = np.where(X.columns==city)[0][0]

    x = np.zeros(len(X.columns))
    x[0] = pm2_5
    x[1] = pm10
    x[2] = no
    x[3] = no2
    x[4] = nox
    x[5] = nh3
    x[6] = co
    x[7] = so2
    x[8] = o3
    x[9] = benzene
    x[10] = toluene
    x[11] = xylene

    if city_index >= 0:
        x[city_index] = 1

    return lgbm.predict([x])[0]

In [60]:
predict_AQI('Ahmedabad',83.13,73.268783,6.93,28.71,33.72,21.072129,6.93,49.52,59.76,0.02,0.00,3.14)

217.71279520430204

In [61]:
predict_AQI('Amaravati',97.09,143.69,4.09,86.95,49.58,1.52,1.11,13.62,33.03,0.1,0.12,0.12)

199.86239702794006

# Export the tested model to a pickle file

In [70]:
import pickle
with open('aqi_daily_model.pickle','wb') as f:
    pickle.dump(lgbm,f)

# Export location and column information to a file that will be useful later on in our prediction application

In [71]:
import json
columns = {
    'data_columns' : [col.lower() for col in X.columns]
}
with open("columns.json","w") as f:
    f.write(json.dumps(columns))