In [None]:
# To upload our datasets from our working directory we need to mount our drive contents to the colab environment.
# For the code to do so you can search “mount” in code snippets or use the code given below.
# Our entire drive contents are now mounted on colab at the location “/gdrive”.

from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive


Mounted at /gdrive
/gdrive


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
pd.set_option('display.max_columns',None)#displaying long list of columns
pd.set_option('display.max_rows', None)#displaying long list of rows
pd.set_option('display.width', 1000)#width of window


In [None]:
trainfile = r'/gdrive/My Drive/train.csv'
trainData = pd.read_csv(trainfile)  #creates a dataframe
testfile = r'/gdrive/My Drive/test.csv'
testData = pd.read_csv(testfile)  #creates a dataframe
print(trainData.shape)
print(testData.shape)

(137, 43)
(100000, 42)


In [None]:
#Extract Target Column before doing missing value substitutions and one-hot encoding======
Target_Train_Cols = trainData["revenue"]#make copy of target column
trainData = trainData.drop(["revenue"], axis=1) #extracting training data without the target column

print(trainData.shape)

(137, 42)


In [None]:
#DROP COLUMNS WITH LOTS OF MISSING VALUES===============================
#CAN ALSO DROP ROWS WITH LOTS OF MISSING VALUES
#Combine Train data and test data first so that the SAME COLUMNS are DROPPED in each
combined_Data = pd.concat([trainData, testData], keys=[0,1])

combined_Data.isnull().sum().sort_values(ascending=False)


Id            0
P27           0
P19           0
P20           0
P21           0
P22           0
P23           0
P24           0
P25           0
P26           0
P28           0
Open Date     0
P29           0
P30           0
P31           0
P32           0
P33           0
P34           0
P35           0
P36           0
P18           0
P17           0
P16           0
P15           0
City          0
City Group    0
Type          0
P1            0
P2            0
P3            0
P4            0
P5            0
P6            0
P7            0
P8            0
P9            0
P10           0
P11           0
P12           0
P13           0
P14           0
P37           0
dtype: int64

In [None]:
#Define threshold for dropping columns
percent=int(0.6*(combined_Data.shape[0]))
print(percent)
#Drop columns that have less than "thresh" number of non_Nans
td1=combined_Data.dropna(thresh=percent,axis=1)
print(td1.shape)

60082
(100137, 42)


In [None]:
#look at what other columns have missing values
td1.isnull().sum().sort_values(ascending=False)

Id            0
P27           0
P19           0
P20           0
P21           0
P22           0
P23           0
P24           0
P25           0
P26           0
P28           0
Open Date     0
P29           0
P30           0
P31           0
P32           0
P33           0
P34           0
P35           0
P36           0
P18           0
P17           0
P16           0
P15           0
City          0
City Group    0
Type          0
P1            0
P2            0
P3            0
P4            0
P5            0
P6            0
P7            0
P8            0
P9            0
P10           0
P11           0
P12           0
P13           0
P14           0
P37           0
dtype: int64

In [None]:
#NOW IMPUTE MISSING VALUES FOR THE OTHER COLUMNS=========================
#IMPUTE (SUBSTITUTE) MEAN VALUES FOR NaN IN NUMERIC COLUMNS
numeric=td1.select_dtypes(include=['int','float64']).columns
for num in numeric:
  td1[num]=td1[num].fillna(td1[num].mean())

#IMPUTE (SUBSTITUTE) MODE VALUES FOR NaN IN CATEGORICAL COLUMNS
train_cat_cols = td1.select_dtypes(exclude=['int','float64']).columns#selecting the categorical columns
for colss in train_cat_cols:
  if(td1.iloc[0][colss]=="N"):
        td1[colss]=td1[colss].fillna("N")
  else:
    td1[colss]=td1[colss].fillna(td1[colss].mode())

print(td1.head(20))



      Id   Open Date            City  City Group Type  P1   P2   P3   P4  P5  P6  P7  P8  P9  P10  P11  P12  P13  P14  P15  P16  P17  P18  P19  P20  P21  P22  P23  P24  P25   P26  P27   P28  P29  P30  P31  P32  P33  P34  P35  P36  P37
0 0    0  07/17/1999        İstanbul  Big Cities   IL   4  5.0  4.0  4.0   2   2   5   4   5    5    3    5  5.0    1    2    2    2    4    5    4    1    3    3    1    1   1.0  4.0   2.0  3.0    5    3    4    5    5    4    3    4
  1    1  02/14/2008          Ankara  Big Cities   FC   4  5.0  4.0  4.0   1   2   5   5   5    5    1    5  5.0    0    0    0    0    0    3    2    1    3    2    0    0   0.0  0.0   3.0  3.0    0    0    0    0    0    0    0    0
  2    2  03/09/2013      Diyarbakır       Other   IL   2  4.0  2.0  5.0   2   3   5   5   5    5    2    5  5.0    0    0    0    0    0    1    1    1    1    1    0    0   0.0  0.0   1.0  3.0    0    0    0    0    0    0    0    0
  3    3  02/02/2012           Tokat       Other   IL   6  4

In [None]:
#CHECK IF THERE ARE ANY REMAINING MISSING VALUES
td1.isnull().sum().sort_values(ascending=False)

Id            0
P27           0
P19           0
P20           0
P21           0
P22           0
P23           0
P24           0
P25           0
P26           0
P28           0
Open Date     0
P29           0
P30           0
P31           0
P32           0
P33           0
P34           0
P35           0
P36           0
P18           0
P17           0
P16           0
P15           0
City          0
City Group    0
Type          0
P1            0
P2            0
P3            0
P4            0
P5            0
P6            0
P7            0
P8            0
P9            0
P10           0
P11           0
P12           0
P13           0
P14           0
P37           0
dtype: int64

In [None]:
#DROP COLUMNS THAT STILL HAVE NULL VALUES
print(td1.shape)
td1=td1.drop(columns=["PropertyField32", "PropertyField34"])
td1=td1.drop(columns=["PropertyField5"])
td1.isnull().sum()
print(td1.shape)

(434589, 296)
(434589, 293)


In [None]:
#DO ONE-HOT ENCODING ON CATEGORICAL VARIABLES==============================================
#The below function returns a list of categorical features which are not numeric.
train_cat_cols = td1.select_dtypes(exclude=['float','int']).columns #selecting the categorical columns
print(train_cat_cols.shape)
print(train_cat_cols)

#If there are categorical columns which are encoded as numeric ones
#then we need to explicitly enter the column names in a list and concatenate the two lists in python.
#ONE-HOT ENCODING-generate one-hot encoding on a common basis -THIS TAKES 30 MINS

combined_Data = pd.get_dummies(td1,train_cat_cols)
combined_Data.head(10)


(4,)
Index(['Open Date', 'City', 'City Group', 'Type'], dtype='object')


Unnamed: 0,Unnamed: 1,Id,P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15,P16,P17,P18,P19,P20,P21,P22,P23,P24,P25,P26,P27,P28,P29,P30,P31,P32,P33,P34,P35,P36,P37,Open Date_01/01/2000,Open Date_01/03/2013,Open Date_01/03/2014,Open Date_01/04/2014,Open Date_01/05/2000,Open Date_01/07/2000,Open Date_01/07/2011,Open Date_01/09/2010,Open Date_01/14/2011,Open Date_01/15/2013,Open Date_01/16/1999,Open Date_01/16/2009,Open Date_01/17/2009,Open Date_01/17/2012,Open Date_01/18/2011,Open Date_01/18/2013,Open Date_01/20/2012,Open Date_01/22/2007,Open Date_01/22/2011,Open Date_01/22/2013,Open Date_01/23/2009,Open Date_01/25/2010,Open Date_01/25/2014,Open Date_01/26/2009,Open Date_01/26/2012,Open Date_01/27/2005,Open Date_01/28/1998,Open Date_01/31/2012,Open Date_02/01/2011,Open Date_02/01/2012,Open Date_02/02/2012,Open Date_02/06/2007,Open Date_02/07/2007,Open Date_02/07/2009,Open Date_02/07/2012,Open Date_02/08/2007,Open Date_02/09/2000,Open Date_02/09/2010,Open Date_02/09/2011,Open Date_02/11/1998,Open Date_02/11/2008,Open Date_02/12/2010,Open Date_02/14/2008,Open Date_02/15/2010,Open Date_02/16/2007,Open Date_02/16/2010,Open Date_02/16/2012,Open Date_02/19/2004,Open Date_02/19/2010,Open Date_02/21/2013,Open Date_02/23/2010,Open Date_02/26/1998,Open Date_02/26/2010,Open Date_02/26/2011,Open Date_02/27/2013,Open Date_02/28/2013,Open Date_02/29/2012,Open Date_03/01/2008,Open Date_03/01/2011,Open Date_03/02/1998,Open Date_03/03/2012,Open Date_03/05/1996,Open Date_03/05/2012,Open Date_03/06/2009,Open Date_03/06/2013,Open Date_03/07/2010,Open Date_03/07/2011,Open Date_03/09/1998,Open Date_03/09/2013,Open Date_03/11/2011,Open Date_03/14/1998,Open Date_03/14/2012,Open Date_03/15/2008,Open Date_03/15/2011,Open Date_03/16/2006,Open Date_03/16/2007,Open Date_03/16/2010,Open Date_03/16/2013,Open Date_03/17/2007,Open Date_03/18/2007,Open Date_03/18/2011,Open Date_03/19/2009,Open Date_03/19/2012,Open Date_03/21/2012,Open Date_03/24/2008,Open Date_03/24/2009,Open Date_03/26/2005,Open Date_03/27/2013,Open Date_03/28/2009,Open Date_03/29/2013,Open Date_03/30/2002,Open Date_03/30/2012,Open Date_03/31/2011,Open Date_04/01/2000,Open Date_04/01/2008,Open Date_04/03/2010,Open Date_04/04/2008,Open Date_04/04/2012,Open Date_04/06/1998,Open Date_04/08/2010,Open Date_04/09/2000,Open Date_04/09/2011,Open Date_04/10/1997,Open Date_04/11/2012,Open Date_04/12/1997,Open Date_04/12/2012,Open Date_04/12/2013,Open Date_04/13/2007,Open Date_04/14/2009,Open Date_04/14/2010,Open Date_04/17/2009,Open Date_04/18/2006,Open Date_04/18/2008,Open Date_04/19/2013,Open Date_04/21/2007,Open Date_04/21/2012,Open Date_04/22/2008,Open Date_04/22/2010,Open Date_04/23/2013,Open Date_04/26/2013,Open Date_04/28/2012,Open Date_05/01/1997,Open Date_05/01/1998,Open Date_05/01/2009,Open Date_05/01/2010,Open Date_05/01/2011,Open Date_05/01/2012,Open Date_05/01/2013,Open Date_05/02/2008,Open Date_05/02/2013,Open Date_05/03/2009,Open Date_05/03/2013,Open Date_05/04/2012,Open Date_05/05/2007,Open Date_05/06/2013,Open Date_05/07/2010,Open Date_05/08/1995,Open Date_05/08/1996,Open Date_05/09/2008,Open Date_05/09/2009,Open Date_05/11/2009,Open Date_05/12/2013,Open Date_05/15/2012,Open Date_05/16/2008,Open Date_05/18/1998,Open Date_05/18/2011,Open Date_05/22/2012,Open Date_05/24/2007,Open Date_05/29/2013,Open Date_05/30/2006,Open Date_05/30/2008,Open Date_06/01/2009,Open Date_06/01/2010,Open Date_06/01/2012,Open Date_06/03/2009,Open Date_06/04/1996,Open Date_06/05/1999,Open Date_06/05/2012,Open Date_06/06/2003,Open Date_06/06/2007,Open Date_06/06/2013,Open Date_06/07/2013,Open Date_06/08/2000,Open Date_06/08/2012,Open Date_06/09/2007,Open Date_06/13/2006,Open Date_06/13/2009,Open Date_06/14/2013,Open Date_06/15/2008,Open Date_06/15/2010,Open Date_06/16/2012,Open Date_06/17/2010,Open Date_06/18/2007,Open Date_06/20/2007,Open Date_06/20/2011,Open Date_06/21/2008,Open Date_06/21/2011,Open Date_06/23/2009,Open Date_06/24/2011,Open Date_06/24/2012,Open Date_06/25/2008,Open Date_06/26/2010,Open Date_06/27/2007,Open Date_06/28/2012,Open Date_06/29/1997,Open Date_06/30/2001,Open Date_07/01/2006,Open Date_07/04/2008,Open Date_07/04/2009,Open Date_07/04/2011,Open Date_07/05/1997,Open Date_07/07/2010,Open Date_07/08/2006,Open Date_07/09/2012,Open Date_07/09/2013,Open Date_07/10/2004,Open Date_07/10/2013,Open Date_07/12/2012,Open Date_07/12/2013,Open Date_07/13/1998,Open Date_07/14/2000,Open Date_07/14/2007,Open Date_07/15/2011,Open Date_07/15/2013,Open Date_07/16/2012,Open Date_07/17/1999,Open Date_07/17/2009,Open Date_07/18/2011,Open Date_07/20/2007,Open Date_07/20/2008,Open Date_07/22/2010,Open Date_07/23/2011,Open Date_07/26/2011,Open Date_07/28/2000,Open Date_07/30/2010,Open Date_07/31/2013,Open Date_08/01/2000,Open Date_08/01/2008,Open Date_08/01/2010,Open Date_08/02/1995,Open Date_08/02/2010,Open Date_08/02/2011,Open Date_08/03/2012,Open Date_08/05/2009,Open Date_08/05/2011,Open Date_08/05/2013,Open Date_08/06/2009,Open Date_08/07/2013,Open Date_08/08/2011,Open Date_08/08/2013,Open Date_08/09/2013,Open Date_08/10/2012,Open Date_08/11/2007,Open Date_08/11/2010,Open Date_08/12/1998,Open Date_08/12/2008,Open Date_08/13/2009,Open Date_08/13/2011,Open Date_08/14/2009,Open Date_08/15/2005,Open Date_08/16/2011,Open Date_08/18/2005,Open Date_08/18/2011,Open Date_08/18/2012,Open Date_08/19/2006,Open Date_08/20/2004,Open Date_08/20/2011,Open Date_08/22/2004,Open Date_08/23/2010,Open Date_08/23/2013,Open Date_08/24/1996,Open Date_08/25/2006,Open Date_08/25/2007,Open Date_08/25/2010,Open Date_08/25/2011,Open Date_08/26/2000,Open Date_08/27/1999,Open Date_08/28/2009,Open Date_08/28/2010,Open Date_08/30/2011,Open Date_08/30/2013,Open Date_09/01/2001,Open Date_09/01/2009,Open Date_09/01/2010,Open Date_09/03/2010,Open Date_09/04/2009,Open Date_09/06/1996,Open Date_09/06/1997,Open Date_09/07/2007,Open Date_09/07/2010,Open Date_09/08/2010,Open Date_09/09/2005,Open Date_09/09/2010,Open Date_09/09/2012,Open Date_09/09/2013,Open Date_09/10/1998,Open Date_09/11/2004,Open Date_09/13/1999,Open Date_09/13/2006,Open Date_09/14/2009,Open Date_09/15/2006,Open Date_09/15/2007,Open Date_09/15/2011,Open Date_09/17/2009,Open Date_09/19/2009,Open Date_09/20/2009,Open Date_09/20/2013,Open Date_09/21/2007,Open Date_09/21/2012,Open Date_09/23/2008,Open Date_09/24/2009,Open Date_09/26/2003,Open Date_09/26/2006,Open Date_09/26/2009,Open Date_09/26/2012,Open Date_09/27/2011,Open Date_09/28/2008,Open Date_09/28/2010,Open Date_09/29/2012,Open Date_10/04/2000,Open Date_10/04/2011,Open Date_10/04/2012,Open Date_10/04/2013,Open Date_10/05/2010,Open Date_10/06/2006,Open Date_10/06/2007,Open Date_10/06/2010,Open Date_10/09/1999,Open Date_10/09/2009,Open Date_10/10/2011,Open Date_10/10/2013,Open Date_10/11/2010,Open Date_10/12/2006,Open Date_10/12/2013,Open Date_10/13/2004,Open Date_10/13/2006,Open Date_10/14/2011,Open Date_10/14/2012,Open Date_10/14/2013,Open Date_10/15/2005,Open Date_10/15/2011,Open Date_10/15/2012,Open Date_10/16/2009,Open Date_10/16/2010,Open Date_10/17/1995,Open Date_10/17/2008,Open Date_10/18/2012,Open Date_10/21/2011,Open Date_10/22/2006,Open Date_10/23/1999,Open Date_10/23/2004,Open Date_10/24/2006,Open Date_10/25/2013,Open Date_10/26/2009,Open Date_10/28/2008,Open Date_10/28/2012,Open Date_10/29/2007,Open Date_10/29/2010,Open Date_10/29/2011,Open Date_10/30/2013,Open Date_11/01/2002,Open Date_11/01/2006,Open Date_11/01/2007,Open Date_11/02/2000,Open Date_11/03/2012,Open Date_11/04/2011,Open Date_11/05/2008,Open Date_11/05/2009,Open Date_11/05/2011,Open Date_11/06/2002,Open Date_11/08/2009,Open Date_11/08/2011,Open Date_11/08/2013,Open Date_11/12/2010,Open Date_11/12/2013,Open Date_11/13/2004,Open Date_11/13/2009,Open Date_11/13/2013,Open Date_11/14/2012,Open Date_11/15/2010,Open Date_11/16/2011,Open Date_11/16/2012,Open Date_11/17/1995,Open Date_11/17/2009,Open Date_11/19/2009,Open Date_11/21/1999,Open Date_11/21/2009,Open Date_11/22/2007,Open Date_11/22/2009,Open Date_11/22/2012,Open Date_11/25/2008,Open Date_11/25/2009,Open Date_11/25/2011,Open Date_11/26/2009,Open Date_11/26/2010,Open Date_11/27/2006,Open Date_11/27/2008,Open Date_11/27/2011,Open Date_11/29/2007,Open Date_11/29/2008,Open Date_12/01/2007,Open Date_12/01/2008,Open Date_12/01/2011,Open Date_12/02/2007,Open Date_12/02/2008,Open Date_12/03/2012,Open Date_12/03/2013,Open Date_12/04/1999,Open Date_12/04/2012,Open Date_12/06/2006,Open Date_12/06/2008,Open Date_12/06/2011,Open Date_12/06/2013,Open Date_12/07/2004,Open Date_12/07/2008,Open Date_12/08/2012,Open Date_12/09/2006,Open Date_12/11/2012,Open Date_12/12/2011,Open Date_12/16/2005,Open Date_12/16/2009,Open Date_12/17/2006,Open Date_12/17/2010,Open Date_12/17/2013,Open Date_12/18/1998,Open Date_12/18/1999,Open Date_12/18/2010,Open Date_12/20/1997,Open Date_12/20/2003,Open Date_12/21/2011,Open Date_12/21/2012,Open Date_12/21/2013,Open Date_12/23/2005,Open Date_12/23/2009,Open Date_12/23/2011,Open Date_12/23/2012,Open Date_12/24/2010,Open Date_12/25/2009,Open Date_12/27/2005,Open Date_12/28/2010,Open Date_12/28/2011,Open Date_12/29/2011,Open Date_12/29/2012,Open Date_12/30/2011,Open Date_12/31/1997,Open Date_12/31/2011,Open Date_12/31/2012,City_Adana,City_Afyonkarahisar,City_Aksaray,City_Amasya,City_Ankara,City_Antalya,City_Artvin,City_Aydın,City_Balıkesir,City_Batman,City_Bilecik,City_Bolu,City_Bursa,City_Denizli,City_Diyarbakır,City_Düzce,City_Edirne,City_Elazığ,City_Erzincan,City_Erzurum,City_Eskişehir,City_Gaziantep,City_Giresun,City_Hatay,City_Isparta,City_Kahramanmaraş,City_Karabük,City_Kars,City_Kastamonu,City_Kayseri,City_Kocaeli,City_Konya,City_Kütahya,City_Kırklareli,City_Kırıkkale,City_Kırşehir,City_Malatya,City_Manisa,City_Mardin,City_Mersin,City_Muğla,City_Nevşehir,City_Niğde,City_Ordu,City_Osmaniye,City_Rize,City_Sakarya,City_Samsun,City_Siirt,City_Sivas,City_Tanımsız,City_Tekirdağ,City_Tokat,City_Trabzon,City_Uşak,City_Yalova,City_Zonguldak,City_Çanakkale,City_Çankırı,City_Çorum,City_İstanbul,City_İzmir,City_Şanlıurfa,City Group_Big Cities,City Group_Other,Type_DT,Type_FC,Type_IL,Type_MB
0,0,0,4,5.0,4.0,4.0,2,2,5,4,5,5,3,5,5.0,1,2,2,2,4,5,4,1,3,3,1,1,1.0,4.0,2.0,3.0,5,3,4,5,5,4,3,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0
0,1,1,4,5.0,4.0,4.0,1,2,5,5,5,5,1,5,5.0,0,0,0,0,0,3,2,1,3,2,0,0,0.0,0.0,3.0,3.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
0,2,2,2,4.0,2.0,5.0,2,3,5,5,5,5,2,5,5.0,0,0,0,0,0,1,1,1,1,1,0,0,0.0,0.0,1.0,3.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
0,3,3,6,4.5,6.0,6.0,4,4,10,8,10,10,8,10,7.5,6,4,9,3,12,20,12,6,1,10,2,2,2.5,2.5,2.5,7.5,25,12,10,6,18,12,12,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
0,4,4,3,4.0,3.0,4.0,2,2,5,5,5,5,2,5,5.0,2,1,2,1,4,2,2,1,2,1,2,3,3.0,5.0,1.0,3.0,5,1,3,2,3,4,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
0,5,5,6,6.0,4.5,7.5,8,10,10,8,8,8,10,8,6.0,0,0,0,0,0,5,6,3,1,5,0,0,0.0,0.0,7.5,5.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
0,6,6,2,3.0,4.0,4.0,1,5,5,5,5,5,2,5,5.0,3,4,4,3,4,2,4,1,2,1,5,4,4.0,5.0,1.0,3.0,4,5,2,2,3,5,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0
0,7,7,4,5.0,4.0,5.0,2,3,5,4,4,4,4,3,4.0,0,0,0,0,0,3,5,2,4,2,0,0,0.0,0.0,3.0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0
0,8,8,1,1.0,4.0,4.0,1,2,1,5,5,5,1,5,5.0,1,1,2,1,4,1,1,1,1,1,4,4,4.0,2.0,2.0,3.0,4,5,5,3,4,5,4,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
0,9,9,6,4.5,6.0,7.5,6,4,10,10,10,10,2,10,7.5,0,0,0,0,0,25,3,3,1,10,0,0,0.0,0.0,5.0,2.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0


In [None]:
#Separate Train data and test data
trainData = td1.xs(0)
testData = td1.xs(1)
print(trainData.shape)
print(testData.shape)

trainData=pd.concat([trainData,Target_Train_Cols], axis=1)
print(trainData.shape)


(137, 42)
(100000, 42)
(137, 43)


In [None]:
export_csv = trainData.to_csv(r'/gdrive/My Drive/Preprocess_Train_Assignment4.csv')
exporttest_csv = testData.to_csv(r'/gdrive/My Drive/Preprocess_Test_Assignment4.csv')