In [1]:
# Initial imports
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import random
import datetime

import warnings
warnings.filterwarnings('ignore')

from sklearn import preprocessing

In [2]:
random.seed(3)

In [3]:
# Imports for better visualization
from matplotlib import rcParams
#colorbrewer2 Dark2 qualitative color table
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843)]

rcParams['figure.figsize'] = (8, 3)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'
rcParams['axes.grid'] = True
rcParams['axes.facecolor'] = '#eeeeee'

## Importing Data

In [28]:
Supp = pd.read_csv(r"C:\Users\HARSH\Desktop\AMEX\COmbining\Supp.csv")
Credit = pd.read_csv(r"C:\Users\HARSH\Desktop\AMEX\COmbining\Credit.csv")
Elite = pd.read_csv(r"C:\Users\HARSH\Desktop\AMEX\COmbining\Elite.csv")
NOne = pd.read_csv(r"C:\Users\HARSH\Desktop\AMEX\COmbining\NOne.csv")

In [29]:
Supp.head(2)

Unnamed: 0,cm_key,Supp_NO,Supp_Yes
0,50001,0.947563,0.052437
1,50002,0.962295,0.037705


In [31]:
data = pd.merge(Supp, Credit, how="inner")
data = pd.merge(data, Elite, how="inner")
data = pd.merge(data, NOne, how="inner")

In [30]:
data.shape

(10000, 12)

In [32]:
data.head(2)

Unnamed: 0,cm_key,Supp_NO,Supp_Yes,Credit_NO,Credit_Yes,Elite_NO,Elite_Yes,None_NO,None_Yes
0,50001,0.947563,0.052437,0.930272,0.069728,0.817302,0.182698,0.264997,0.735003
1,50002,0.962295,0.037705,0.996729,0.003271,0.962443,0.037557,0.124649,0.875351


## According to Max

In [47]:
data["Max"] = data.apply(lambda x: max(x["Supp_Yes"], x["Elite_Yes"], x["Credit_Yes"]), axis=1)
data["predict"] = "Supp"
data["predict"][data.Max == data.Elite_Yes] = "Elite"
data["predict"][data.Max == data.Credit_Yes] = "Credit"

In [34]:
data['Rank_Max'] = data["Max"].rank(ascending=0)
data = data.sort(["Rank_Max"], ascending = 1)

In [36]:
data.head(2)

Unnamed: 0,cm_key,Supp_NO,Supp_Yes,Credit_NO,Credit_Yes,Elite_NO,Elite_Yes,None_NO,None_Yes,Max,Rank_Max
126,50127,0.126912,0.873088,0.999331,0.000669,0.999994,6e-06,0.731745,0.268255,0.873088,1.0
8225,58226,0.93861,0.06139,0.982611,0.017389,0.129592,0.870408,0.650703,0.349297,0.870408,2.0


## According to None

In [38]:
data['RankNone'] = data["None_Yes"].rank(ascending=1)
data = data.sort(["RankNone"], ascending = 1)

In [39]:
data.head(2)

Unnamed: 0,cm_key,Supp_NO,Supp_Yes,Credit_NO,Credit_Yes,Elite_NO,Elite_Yes,None_NO,None_Yes,Max,Rank_Max,RankNone
643,50644,0.543376,0.456624,0.998767,0.001233,0.978124,0.021876,0.781752,0.218248,0.456624,219.0,1.0
7144,57145,0.904556,0.095444,0.743311,0.256689,0.608052,0.391947,0.74338,0.25662,0.391947,367.0,2.0


## According to Supp_No + Credit_No + Elite_No + None_Yes

In [40]:
data["No"] = data.Supp_NO + data.Credit_NO + data.Elite_NO + data.None_Yes

data['Rank_NO'] = data["No"].rank(ascending=1)
data = data.sort(["Rank_NO"], ascending = 1)

In [41]:
del data["Supp_NO"]
del data["Credit_NO"]
del data["Elite_NO"]
del data["None_NO"]

In [42]:
data.head(2)

Unnamed: 0,cm_key,Supp_Yes,Credit_Yes,Elite_Yes,None_Yes,Max,Rank_Max,RankNone,No,Rank_NO
4,50005,0.025508,0.179271,0.830389,0.327184,0.830389,3.0,21.0,2.292017,1.0
3533,53534,0.108121,0.144952,0.672711,0.282073,0.672711,30.0,6.0,2.356289,2.0


## According to Max -  None

In [43]:
data["Max-None"] = data["Max"] - data["None_Yes"]

data['Rank_Max-None'] = data["Max-None"].rank(ascending=0)
data = data.sort(["Rank_Max-None"], ascending = 1)

In [44]:
data.head(2)

Unnamed: 0,cm_key,Supp_Yes,Credit_Yes,Elite_Yes,None_Yes,Max,Rank_Max,RankNone,No,Rank_NO,Max-None,Rank_Max-None
126,50127,0.873088,0.000669,6e-06,0.268255,0.873088,1.0,3.0,2.394492,5.0,0.604834,1.0
8225,58226,0.06139,0.017389,0.870408,0.349297,0.870408,2.0,34.0,2.40011,6.0,0.521111,2.0


## Ensembling

In [45]:
data["Rank_Combine"] = data["Rank_Max-None"] + data["Rank_NO"]

In [48]:
data = data.sort(["Rank_Combine"], ascending = 1)
submission = data[["cm_key", "predict"]][0:1000]

In [49]:
data.head(2)

Unnamed: 0,cm_key,Supp_Yes,Credit_Yes,Elite_Yes,None_Yes,Max,Rank_Max,RankNone,No,Rank_NO,Max-None,Rank_Max-None,Rank_Combine,predict
4,50005,0.025508,0.179271,0.830389,0.327184,0.830389,3.0,21.0,2.292017,1.0,0.503205,3.0,4.0,Elite
126,50127,0.873088,0.000669,6e-06,0.268255,0.873088,1.0,3.0,2.394492,5.0,0.604834,1.0,6.0,Supp


In [52]:
submission.head(2)

Unnamed: 0,cm_key,predict
4,50005,Elite
126,50127,Supp


In [53]:
submission["predict"].value_counts()

Elite     381
Credit    356
Supp      263
Name: predict, dtype: int64

In [210]:
now = datetime.datetime.now()
submission.to_csv(r"C:\Users\HARSH\Desktop\AMEX\Submissions\Models_Final" + str(now.day) + "_" + str(now.hour) +"_" + str(now.minute) + ".csv", index=False,header=None)