In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from feature_engine import missing_data_imputers as mdi
from feature_engine import categorical_encoders as ce
from feature_engine import discretisers as dsc
import lightgbm as lgb
import catboost as cb
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [10]:
train=pd.read_csv("train_Wc8LBpr.csv")
test=pd.read_csv("test_VsU9xXK.csv")

In [11]:
100*train.isnull().sum()/train.shape[0]

Trip_ID                         0.000000
Trip_Distance                   0.000000
Type_of_Cab                    15.349911
Customer_Since_Months           4.496362
Life_Style_Index               15.336999
Confidence_Life_Style_Index    15.336999
Destination_Type                0.000000
Customer_Rating                 0.000000
Cancellation_Last_1Month        0.000000
Var1                           53.948748
Var2                            0.000000
Var3                            0.000000
Gender                          0.000000
Surge_Pricing_Type              0.000000
dtype: float64

In [40]:
def frequency_encode_fun(train,test):
    #print(train[["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]].dtypes)
    cols=[i+"_freq_encode" for i in ["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]]
    imp=ce.CountFrequencyCategoricalEncoder(encoding_method="frequency")
    df=imp.fit_transform(train[["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]])
    df.columns=cols
    train=pd.concat([train,df],axis=1)
    df=imp.transform(test[["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]])
    df.columns=cols
    test=pd.concat([test,df],axis=1)
    return train,test

In [41]:
def mean_cat_encode_fun(train,test,target):
    cols=[i+"_mean_cat_encode" for i in ["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]]
    imp=ce.MeanCategoricalEncoder()
    df=imp.fit_transform(train[["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]],target)
    df.columns=cols
    train=pd.concat([train,df],axis=1)
    df=imp.transform(test[["Type_of_Cab","Confidence_Life_Style_Index","Destination_Type","Gender"]])
    df.columns=cols
    test=pd.concat([test,df],axis=1)
    return train,test

In [47]:
def acc_fun():
    X=train.drop(columns=["Surge_Pricing_Type","Trip_ID"])
    y=train["Surge_Pricing_Type"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    #print(X_train.columns)
    #print(X_train.shape)
    
    pipe=Pipeline([
    ("missing_indicator",mdi.AddMissingIndicator()),
    ("mean_median",mdi.MeanMedianImputer(imputation_method="median",variables=["Customer_Since_Months","Life_Style_Index"])),
    ("random_imputer",mdi.CategoricalVariableImputer(variables=["Type_of_Cab","Confidence_Life_Style_Index"]))
    ])
    
    X_train=pipe.fit_transform(X_train)
    X_test=pipe.transform(X_test)
    
    X_train,X_test=frequency_encode_fun(X_train,X_test)
    X_train,X_test=mean_cat_encode_fun(X_train,X_test,y_train)
    
    #print(X_train.columns)
    #print(X_train.shape)
    #print(X_train)
    cols=X_train.select_dtypes("object").columns

    train_dummy=pd.get_dummies(data=X_train,columns=cols,drop_first=True)
    test_dummy=pd.get_dummies(data=X_test,columns=cols,drop_first=True)
    
    model=lgb.LGBMClassifier()
    model.fit(train_dummy,y_train)
    lgb_pred=model.predict(test_dummy)
    
    model=cb.CatBoostClassifier()
    model.fit(train_dummy.drop(columns=["Var1"]),y_train)
    cb_pred=model.predict(test_dummy.drop(columns=["Var1"]))
    
    return accuracy_score(y_test,lgb_pred),accuracy_score(y_test,cb_pred)

In [48]:
def prediction_fun():
    X=train.drop(columns=["Surge_Pricing_Type","Trip_ID"])
    y=train["Surge_Pricing_Type"]
    test_df=test.drop(columns=["Trip_ID"])
    #print(X.shape)
    #print(X.columns)
    pipe=Pipeline([
    ("missing_indicator",mdi.AddMissingIndicator()),
    ("mean_median",mdi.MeanMedianImputer(imputation_method="median",variables=["Customer_Since_Months","Life_Style_Index"])),
    ("random_imputer",mdi.CategoricalVariableImputer(variables=["Type_of_Cab","Confidence_Life_Style_Index"]))
    ])
    
    X=pipe.fit_transform(X)
    test_df=pipe.transform(test_df)
    
    X,test_df=frequency_encode_fun(X,test_df)
    X,test_df=mean_cat_encode_fun(X,test_df,y)
    
    cols=X.select_dtypes("object").columns

    train_dummy=pd.get_dummies(data=X,columns=cols,drop_first=True)
    test_dummy=pd.get_dummies(data=test_df,columns=cols,drop_first=True)

    model=cb.CatBoostClassifier()
    model.fit(train_dummy,y)
    pred=model.predict(test_dummy)
    
    return pred

In [49]:
acc_fun()

0:	learn: 1.0790947	total: 55.1ms	remaining: 55s
1:	learn: 1.0606980	total: 111ms	remaining: 55.5s
2:	learn: 1.0435885	total: 162ms	remaining: 53.8s
3:	learn: 1.0274834	total: 216ms	remaining: 53.9s
4:	learn: 1.0127875	total: 273ms	remaining: 54.4s
5:	learn: 0.9990634	total: 344ms	remaining: 57s
6:	learn: 0.9864248	total: 397ms	remaining: 56.4s
7:	learn: 0.9739192	total: 455ms	remaining: 56.4s
8:	learn: 0.9622647	total: 514ms	remaining: 56.6s
9:	learn: 0.9512426	total: 564ms	remaining: 55.8s
10:	learn: 0.9407594	total: 618ms	remaining: 55.5s
11:	learn: 0.9311215	total: 668ms	remaining: 55s
12:	learn: 0.9218937	total: 729ms	remaining: 55.3s
13:	learn: 0.9131757	total: 782ms	remaining: 55.1s
14:	learn: 0.9049490	total: 836ms	remaining: 54.9s
15:	learn: 0.8971702	total: 899ms	remaining: 55.3s
16:	learn: 0.8899509	total: 975ms	remaining: 56.4s
17:	learn: 0.8829415	total: 1.03s	remaining: 56.4s
18:	learn: 0.8764797	total: 1.09s	remaining: 56.1s
19:	learn: 0.8700632	total: 1.16s	remaining: 5

164:	learn: 0.7061291	total: 11s	remaining: 55.7s
165:	learn: 0.7059528	total: 11.1s	remaining: 55.6s
166:	learn: 0.7057866	total: 11.1s	remaining: 55.5s
167:	learn: 0.7056385	total: 11.2s	remaining: 55.4s
168:	learn: 0.7054488	total: 11.2s	remaining: 55.3s
169:	learn: 0.7052675	total: 11.3s	remaining: 55.2s
170:	learn: 0.7050656	total: 11.4s	remaining: 55.1s
171:	learn: 0.7048814	total: 11.4s	remaining: 55s
172:	learn: 0.7046939	total: 11.5s	remaining: 55s
173:	learn: 0.7044874	total: 11.6s	remaining: 55.1s
174:	learn: 0.7043132	total: 11.7s	remaining: 55s
175:	learn: 0.7041306	total: 11.7s	remaining: 54.9s
176:	learn: 0.7039479	total: 11.8s	remaining: 54.8s
177:	learn: 0.7037715	total: 11.9s	remaining: 54.8s
178:	learn: 0.7036281	total: 11.9s	remaining: 54.7s
179:	learn: 0.7034326	total: 12s	remaining: 54.7s
180:	learn: 0.7032551	total: 12.1s	remaining: 54.6s
181:	learn: 0.7031248	total: 12.1s	remaining: 54.5s
182:	learn: 0.7029767	total: 12.2s	remaining: 54.5s
183:	learn: 0.7028406	

325:	learn: 0.6907499	total: 22.8s	remaining: 47.2s
326:	learn: 0.6906916	total: 22.9s	remaining: 47.1s
327:	learn: 0.6906303	total: 23s	remaining: 47.1s
328:	learn: 0.6905913	total: 23s	remaining: 47s
329:	learn: 0.6905447	total: 23.1s	remaining: 46.9s
330:	learn: 0.6905026	total: 23.2s	remaining: 46.8s
331:	learn: 0.6904511	total: 23.2s	remaining: 46.8s
332:	learn: 0.6904060	total: 23.3s	remaining: 46.7s
333:	learn: 0.6903532	total: 23.4s	remaining: 46.7s
334:	learn: 0.6902904	total: 23.5s	remaining: 46.6s
335:	learn: 0.6902432	total: 23.5s	remaining: 46.5s
336:	learn: 0.6902088	total: 23.6s	remaining: 46.5s
337:	learn: 0.6901613	total: 23.7s	remaining: 46.5s
338:	learn: 0.6901137	total: 23.8s	remaining: 46.4s
339:	learn: 0.6900916	total: 23.9s	remaining: 46.4s
340:	learn: 0.6900366	total: 23.9s	remaining: 46.3s
341:	learn: 0.6899710	total: 24s	remaining: 46.2s
342:	learn: 0.6899328	total: 24.1s	remaining: 46.1s
343:	learn: 0.6898827	total: 24.2s	remaining: 46.1s
344:	learn: 0.689815

484:	learn: 0.6836900	total: 33.3s	remaining: 35.4s
485:	learn: 0.6836571	total: 33.4s	remaining: 35.3s
486:	learn: 0.6836166	total: 33.4s	remaining: 35.2s
487:	learn: 0.6835901	total: 33.5s	remaining: 35.1s
488:	learn: 0.6835603	total: 33.5s	remaining: 35s
489:	learn: 0.6835298	total: 33.6s	remaining: 35s
490:	learn: 0.6834983	total: 33.6s	remaining: 34.9s
491:	learn: 0.6834570	total: 33.7s	remaining: 34.8s
492:	learn: 0.6834224	total: 33.8s	remaining: 34.7s
493:	learn: 0.6834069	total: 33.8s	remaining: 34.6s
494:	learn: 0.6833750	total: 33.9s	remaining: 34.6s
495:	learn: 0.6833524	total: 33.9s	remaining: 34.5s
496:	learn: 0.6832957	total: 34s	remaining: 34.4s
497:	learn: 0.6832593	total: 34.1s	remaining: 34.4s
498:	learn: 0.6832334	total: 34.1s	remaining: 34.3s
499:	learn: 0.6831961	total: 34.2s	remaining: 34.2s
500:	learn: 0.6831566	total: 34.3s	remaining: 34.1s
501:	learn: 0.6831294	total: 34.3s	remaining: 34.1s
502:	learn: 0.6830786	total: 34.4s	remaining: 34s
503:	learn: 0.683045

644:	learn: 0.6786992	total: 44s	remaining: 24.2s
645:	learn: 0.6786780	total: 44s	remaining: 24.1s
646:	learn: 0.6786343	total: 44.1s	remaining: 24.1s
647:	learn: 0.6786125	total: 44.2s	remaining: 24s
648:	learn: 0.6785814	total: 44.2s	remaining: 23.9s
649:	learn: 0.6785580	total: 44.3s	remaining: 23.8s
650:	learn: 0.6785161	total: 44.4s	remaining: 23.8s
651:	learn: 0.6784955	total: 44.4s	remaining: 23.7s
652:	learn: 0.6784723	total: 44.5s	remaining: 23.6s
653:	learn: 0.6784537	total: 44.5s	remaining: 23.6s
654:	learn: 0.6784308	total: 44.6s	remaining: 23.5s
655:	learn: 0.6784003	total: 44.6s	remaining: 23.4s
656:	learn: 0.6783708	total: 44.7s	remaining: 23.3s
657:	learn: 0.6783422	total: 44.7s	remaining: 23.2s
658:	learn: 0.6783055	total: 44.8s	remaining: 23.2s
659:	learn: 0.6782836	total: 44.8s	remaining: 23.1s
660:	learn: 0.6782675	total: 44.8s	remaining: 23s
661:	learn: 0.6782455	total: 44.9s	remaining: 22.9s
662:	learn: 0.6782188	total: 44.9s	remaining: 22.8s
663:	learn: 0.678187

804:	learn: 0.6746533	total: 53.3s	remaining: 12.9s
805:	learn: 0.6746371	total: 53.3s	remaining: 12.8s
806:	learn: 0.6746093	total: 53.4s	remaining: 12.8s
807:	learn: 0.6745845	total: 53.4s	remaining: 12.7s
808:	learn: 0.6745562	total: 53.5s	remaining: 12.6s
809:	learn: 0.6745231	total: 53.5s	remaining: 12.6s
810:	learn: 0.6745007	total: 53.6s	remaining: 12.5s
811:	learn: 0.6744782	total: 53.7s	remaining: 12.4s
812:	learn: 0.6744561	total: 53.7s	remaining: 12.4s
813:	learn: 0.6744272	total: 53.8s	remaining: 12.3s
814:	learn: 0.6744058	total: 53.9s	remaining: 12.2s
815:	learn: 0.6743866	total: 53.9s	remaining: 12.2s
816:	learn: 0.6743669	total: 54s	remaining: 12.1s
817:	learn: 0.6743424	total: 54s	remaining: 12s
818:	learn: 0.6743178	total: 54.1s	remaining: 12s
819:	learn: 0.6742950	total: 54.2s	remaining: 11.9s
820:	learn: 0.6742630	total: 54.2s	remaining: 11.8s
821:	learn: 0.6742518	total: 54.3s	remaining: 11.8s
822:	learn: 0.6742388	total: 54.3s	remaining: 11.7s
823:	learn: 0.674223

964:	learn: 0.6710829	total: 1m 2s	remaining: 2.27s
965:	learn: 0.6710598	total: 1m 2s	remaining: 2.2s
966:	learn: 0.6710443	total: 1m 2s	remaining: 2.14s
967:	learn: 0.6710168	total: 1m 2s	remaining: 2.07s
968:	learn: 0.6709991	total: 1m 2s	remaining: 2.01s
969:	learn: 0.6709840	total: 1m 2s	remaining: 1.94s
970:	learn: 0.6709604	total: 1m 2s	remaining: 1.88s
971:	learn: 0.6709380	total: 1m 2s	remaining: 1.81s
972:	learn: 0.6709237	total: 1m 2s	remaining: 1.75s
973:	learn: 0.6708984	total: 1m 3s	remaining: 1.68s
974:	learn: 0.6708722	total: 1m 3s	remaining: 1.62s
975:	learn: 0.6708511	total: 1m 3s	remaining: 1.55s
976:	learn: 0.6708238	total: 1m 3s	remaining: 1.49s
977:	learn: 0.6708052	total: 1m 3s	remaining: 1.42s
978:	learn: 0.6707825	total: 1m 3s	remaining: 1.36s
979:	learn: 0.6707600	total: 1m 3s	remaining: 1.29s
980:	learn: 0.6707446	total: 1m 3s	remaining: 1.23s
981:	learn: 0.6707274	total: 1m 3s	remaining: 1.16s
982:	learn: 0.6707073	total: 1m 3s	remaining: 1.1s
983:	learn: 0.

(0.7063000797478449, 0.7084266889454297)

In [45]:
pred=prediction_fun()

0:	learn: 1.0786578	total: 65.6ms	remaining: 1m 5s
1:	learn: 1.0602382	total: 134ms	remaining: 1m 6s
2:	learn: 1.0429471	total: 196ms	remaining: 1m 5s
3:	learn: 1.0270384	total: 258ms	remaining: 1m 4s
4:	learn: 1.0119481	total: 328ms	remaining: 1m 5s
5:	learn: 0.9978876	total: 391ms	remaining: 1m 4s
6:	learn: 0.9848014	total: 455ms	remaining: 1m 4s
7:	learn: 0.9724524	total: 524ms	remaining: 1m 5s
8:	learn: 0.9608539	total: 586ms	remaining: 1m 4s
9:	learn: 0.9498420	total: 658ms	remaining: 1m 5s
10:	learn: 0.9394778	total: 738ms	remaining: 1m 6s
11:	learn: 0.9296908	total: 812ms	remaining: 1m 6s
12:	learn: 0.9205821	total: 892ms	remaining: 1m 7s
13:	learn: 0.9119333	total: 953ms	remaining: 1m 7s
14:	learn: 0.9038296	total: 1.01s	remaining: 1m 6s
15:	learn: 0.8959642	total: 1.08s	remaining: 1m 6s
16:	learn: 0.8886349	total: 1.16s	remaining: 1m 6s
17:	learn: 0.8816377	total: 1.22s	remaining: 1m 6s
18:	learn: 0.8749312	total: 1.29s	remaining: 1m 6s
19:	learn: 0.8685848	total: 1.35s	remain

161:	learn: 0.7098763	total: 12.6s	remaining: 1m 4s
162:	learn: 0.7096580	total: 12.6s	remaining: 1m 4s
163:	learn: 0.7094633	total: 12.7s	remaining: 1m 4s
164:	learn: 0.7093099	total: 12.8s	remaining: 1m 4s
165:	learn: 0.7091758	total: 12.9s	remaining: 1m 4s
166:	learn: 0.7090064	total: 12.9s	remaining: 1m 4s
167:	learn: 0.7087618	total: 13s	remaining: 1m 4s
168:	learn: 0.7086101	total: 13.1s	remaining: 1m 4s
169:	learn: 0.7084595	total: 13.2s	remaining: 1m 4s
170:	learn: 0.7082349	total: 13.3s	remaining: 1m 4s
171:	learn: 0.7079980	total: 13.3s	remaining: 1m 4s
172:	learn: 0.7078142	total: 13.4s	remaining: 1m 4s
173:	learn: 0.7076687	total: 13.5s	remaining: 1m 3s
174:	learn: 0.7075237	total: 13.5s	remaining: 1m 3s
175:	learn: 0.7073204	total: 13.6s	remaining: 1m 3s
176:	learn: 0.7071493	total: 13.7s	remaining: 1m 3s
177:	learn: 0.7069361	total: 13.7s	remaining: 1m 3s
178:	learn: 0.7067612	total: 13.8s	remaining: 1m 3s
179:	learn: 0.7066463	total: 13.9s	remaining: 1m 3s
180:	learn: 0.

323:	learn: 0.6932353	total: 24.2s	remaining: 50.5s
324:	learn: 0.6931626	total: 24.2s	remaining: 50.3s
325:	learn: 0.6931152	total: 24.3s	remaining: 50.2s
326:	learn: 0.6930559	total: 24.4s	remaining: 50.2s
327:	learn: 0.6930009	total: 24.4s	remaining: 50s
328:	learn: 0.6929530	total: 24.5s	remaining: 49.9s
329:	learn: 0.6928778	total: 24.6s	remaining: 49.8s
330:	learn: 0.6928145	total: 24.6s	remaining: 49.7s
331:	learn: 0.6927482	total: 24.7s	remaining: 49.6s
332:	learn: 0.6926861	total: 24.7s	remaining: 49.6s
333:	learn: 0.6926154	total: 24.8s	remaining: 49.5s
334:	learn: 0.6925739	total: 24.9s	remaining: 49.4s
335:	learn: 0.6925095	total: 24.9s	remaining: 49.3s
336:	learn: 0.6924456	total: 25s	remaining: 49.2s
337:	learn: 0.6923953	total: 25s	remaining: 49s
338:	learn: 0.6923128	total: 25.1s	remaining: 49s
339:	learn: 0.6922511	total: 25.2s	remaining: 48.9s
340:	learn: 0.6922167	total: 25.2s	remaining: 48.8s
341:	learn: 0.6921872	total: 25.3s	remaining: 48.6s
342:	learn: 0.6921380	

484:	learn: 0.6855841	total: 34.6s	remaining: 36.8s
485:	learn: 0.6855588	total: 34.7s	remaining: 36.7s
486:	learn: 0.6855304	total: 34.8s	remaining: 36.6s
487:	learn: 0.6854900	total: 34.8s	remaining: 36.5s
488:	learn: 0.6854602	total: 34.9s	remaining: 36.4s
489:	learn: 0.6854077	total: 34.9s	remaining: 36.4s
490:	learn: 0.6853680	total: 35s	remaining: 36.3s
491:	learn: 0.6853306	total: 35.1s	remaining: 36.2s
492:	learn: 0.6852897	total: 35.1s	remaining: 36.1s
493:	learn: 0.6852518	total: 35.2s	remaining: 36s
494:	learn: 0.6852087	total: 35.2s	remaining: 35.9s
495:	learn: 0.6851776	total: 35.3s	remaining: 35.9s
496:	learn: 0.6851507	total: 35.3s	remaining: 35.8s
497:	learn: 0.6850953	total: 35.4s	remaining: 35.7s
498:	learn: 0.6850612	total: 35.5s	remaining: 35.6s
499:	learn: 0.6850315	total: 35.5s	remaining: 35.5s
500:	learn: 0.6850008	total: 35.6s	remaining: 35.4s
501:	learn: 0.6849744	total: 35.6s	remaining: 35.4s
502:	learn: 0.6849180	total: 35.7s	remaining: 35.3s
503:	learn: 0.68

643:	learn: 0.6806204	total: 45.2s	remaining: 25s
644:	learn: 0.6805841	total: 45.3s	remaining: 24.9s
645:	learn: 0.6805609	total: 45.3s	remaining: 24.8s
646:	learn: 0.6805281	total: 45.4s	remaining: 24.8s
647:	learn: 0.6805165	total: 45.4s	remaining: 24.7s
648:	learn: 0.6804914	total: 45.5s	remaining: 24.6s
649:	learn: 0.6804554	total: 45.6s	remaining: 24.5s
650:	learn: 0.6804323	total: 45.6s	remaining: 24.5s
651:	learn: 0.6804021	total: 45.7s	remaining: 24.4s
652:	learn: 0.6803614	total: 45.8s	remaining: 24.3s
653:	learn: 0.6803260	total: 45.8s	remaining: 24.2s
654:	learn: 0.6802981	total: 45.9s	remaining: 24.2s
655:	learn: 0.6802676	total: 46s	remaining: 24.1s
656:	learn: 0.6802536	total: 46s	remaining: 24s
657:	learn: 0.6802169	total: 46.1s	remaining: 24s
658:	learn: 0.6801923	total: 46.2s	remaining: 23.9s
659:	learn: 0.6801721	total: 46.2s	remaining: 23.8s
660:	learn: 0.6801536	total: 46.3s	remaining: 23.7s
661:	learn: 0.6801383	total: 46.3s	remaining: 23.7s
662:	learn: 0.6801052	

803:	learn: 0.6767290	total: 57.1s	remaining: 13.9s
804:	learn: 0.6767158	total: 57.2s	remaining: 13.9s
805:	learn: 0.6766936	total: 57.3s	remaining: 13.8s
806:	learn: 0.6766742	total: 57.4s	remaining: 13.7s
807:	learn: 0.6766631	total: 57.5s	remaining: 13.7s
808:	learn: 0.6766427	total: 57.6s	remaining: 13.6s
809:	learn: 0.6766199	total: 57.7s	remaining: 13.5s
810:	learn: 0.6765955	total: 57.8s	remaining: 13.5s
811:	learn: 0.6765648	total: 57.9s	remaining: 13.4s
812:	learn: 0.6765399	total: 58s	remaining: 13.3s
813:	learn: 0.6765123	total: 58.1s	remaining: 13.3s
814:	learn: 0.6764917	total: 58.2s	remaining: 13.2s
815:	learn: 0.6764755	total: 58.3s	remaining: 13.1s
816:	learn: 0.6764626	total: 58.3s	remaining: 13.1s
817:	learn: 0.6764236	total: 58.4s	remaining: 13s
818:	learn: 0.6764070	total: 58.5s	remaining: 12.9s
819:	learn: 0.6763844	total: 58.6s	remaining: 12.9s
820:	learn: 0.6763687	total: 58.7s	remaining: 12.8s
821:	learn: 0.6763358	total: 58.8s	remaining: 12.7s
822:	learn: 0.67

964:	learn: 0.6732475	total: 1m 9s	remaining: 2.52s
965:	learn: 0.6732415	total: 1m 9s	remaining: 2.45s
966:	learn: 0.6732184	total: 1m 9s	remaining: 2.38s
967:	learn: 0.6731892	total: 1m 9s	remaining: 2.31s
968:	learn: 0.6731716	total: 1m 9s	remaining: 2.24s
969:	learn: 0.6731540	total: 1m 10s	remaining: 2.17s
970:	learn: 0.6731433	total: 1m 10s	remaining: 2.1s
971:	learn: 0.6731334	total: 1m 10s	remaining: 2.02s
972:	learn: 0.6731111	total: 1m 10s	remaining: 1.95s
973:	learn: 0.6730983	total: 1m 10s	remaining: 1.88s
974:	learn: 0.6730880	total: 1m 10s	remaining: 1.81s
975:	learn: 0.6730635	total: 1m 10s	remaining: 1.74s
976:	learn: 0.6730502	total: 1m 10s	remaining: 1.66s
977:	learn: 0.6730288	total: 1m 10s	remaining: 1.59s
978:	learn: 0.6730121	total: 1m 10s	remaining: 1.52s
979:	learn: 0.6729898	total: 1m 10s	remaining: 1.45s
980:	learn: 0.6729794	total: 1m 10s	remaining: 1.37s
981:	learn: 0.6729656	total: 1m 11s	remaining: 1.3s
982:	learn: 0.6729547	total: 1m 11s	remaining: 1.23s


In [46]:
sub=pd.read_csv("sample_submission_NoPBkjr.csv")
sub["Surge_Pricing_Type"]=pred
sub.to_csv("submit.csv",index=False)

In [19]:
X=train.drop(columns=["Surge_Pricing_Type","Trip_ID"])
y=train["Surge_Pricing_Type"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
disc = dsc.EqualWidthDiscretiser(bins=5)

# fit the transformer
disc.fit(X_train.dropna())

# transform the data
train_t= disc.transform(X_train.dropna())
#test_t= disc.transform(X_test)
train_t

Unnamed: 0,Trip_Distance,Type_of_Cab,Customer_Since_Months,Life_Style_Index,Confidence_Life_Style_Index,Destination_Type,Customer_Rating,Cancellation_Last_1Month,Var1,Var2,Var3,Gender
126760,0,B,1,1,A,A,3,0,0,0,1,Female
93931,0,B,1,0,A,A,4,0,1,0,1,Male
67866,4,D,4,1,C,A,2,0,1,0,0,Male
99012,2,B,0,2,C,A,3,0,0,1,1,Male
104288,0,D,4,1,A,A,4,0,0,0,0,Male
...,...,...,...,...,...,...,...,...,...,...,...,...
65725,3,C,0,1,C,A,3,0,0,1,0,Male
123855,1,B,0,0,A,A,3,0,1,0,0,Female
64925,1,B,0,1,A,D,3,0,2,0,0,Male
64820,1,D,2,1,B,A,3,1,2,1,0,Male
