# Rank Averaging

<img src = 'rank_average.png'>

In [1]:
#Import required libraries
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Reading a csv file
df = pd.read_csv('bigmart_cleaned.csv')

In [3]:
#Dimension of a dataframe
df.shape

(8523, 46)

In [4]:
#First 5 rows of a dataframe
df.head()

Unnamed: 0,Item_Weight,Item_Visibility,Item_MRP,Outlet_Establishment_Year,Item_Outlet_Sales,Item_Fat_Content_LF,Item_Fat_Content_Low Fat,Item_Fat_Content_Regular,Item_Fat_Content_low fat,Item_Fat_Content_reg,...,Outlet_Size_High,Outlet_Size_Medium,Outlet_Size_Small,Outlet_Location_Type_Tier 1,Outlet_Location_Type_Tier 2,Outlet_Location_Type_Tier 3,Outlet_Type_Grocery Store,Outlet_Type_Supermarket Type1,Outlet_Type_Supermarket Type2,Outlet_Type_Supermarket Type3
0,9.3,0.016047,249.8092,1999,3735.138,0,1,0,0,0,...,0,1,0,1,0,0,0,1,0,0
1,5.92,0.019278,48.2692,2009,443.4228,0,0,1,0,0,...,0,1,0,0,0,1,0,0,1,0
2,17.5,0.01676,141.618,1999,2097.27,0,1,0,0,0,...,0,1,0,1,0,0,0,1,0,0
3,19.2,0.0,182.095,1998,732.38,0,0,1,0,0,...,0,0,0,0,0,1,1,0,0,0
4,8.93,0.0,53.8614,1987,994.7052,0,1,0,0,0,...,1,0,0,0,0,1,0,1,0,0


In [5]:
#Seperate Independent and Dependent variables
X = df.drop('Item_Outlet_Sales', axis=1)
y = df['Item_Outlet_Sales']

In [6]:
#Create a Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11)

### Implement Linear Regression

In [7]:
from sklearn.linear_model import LinearRegression                              #Import Linear Regression
model1 = LinearRegression()                                                    #Creating an instance of Linear Regression
model1.fit(X_train, y_train)                                                   #Training a model
pred1 = model1.predict(X_test)                                                 #Make a predictions
print("Linear Regression Score : ", model1.score(X_test, y_test))              #Evaluate a model

Linear Regression Score :  0.550628302715441


### Implement K Nearest Neighbors

In [8]:
from sklearn.neighbors import KNeighborsRegressor                              #Import KNN Regressor
model2 = KNeighborsRegressor(n_neighbors=9)                                    #Creating an instance of KNN Regressor
model2.fit(X_train, y_train)                                                   #Training a model
pred2 = model2.predict(X_test)                                                 #Make a predictions
print("KNN Regressor Score : ", model2.score(X_test, y_test))                  #Evaluate a model

KNN Regressor Score :  0.4934702448040451


### Implement Decision Tree

In [9]:
from sklearn.tree import DecisionTreeRegressor                                 #Import Decision Tree Regressor
model3 = DecisionTreeRegressor(max_depth=7)                                    #Creating an instance of Decision Tree Regressor
model3.fit(X_train, y_train)                                                   #Training a model
pred3 = model3.predict(X_test)                                                 #Make a predictions
print("Decision Tree Regressor Score: ", model3.score(X_test, y_test))         #Evaluate a model

Decision Tree Regressor Score:  0.5491060514839026


In [10]:
#Creating a dataframe and stores their scores
i = [1, 2, 3]
scores = [model1.score(X_test, y_test), model2.score(X_test, y_test), model3.score(X_test, y_test)]

df_score = pd.DataFrame({'Score' : scores}, index=i)
df_score

Unnamed: 0,Score
1,0.550628
2,0.49347
3,0.549106


In [11]:
#Sort rows of dataframe based on Score
sorted_df_score = df_score.sort_values('Score')
sorted_df_score

Unnamed: 0,Score
2,0.49347
3,0.549106
1,0.550628


In [12]:
#Create a new column
sorted_df_score['Rank'] = [i for i in range(1, 4)]
sorted_df_score

Unnamed: 0,Score,Rank
2,0.49347,1
3,0.549106,2
1,0.550628,3


In [13]:
#Calculate weight and store it into new column
sorted_df_score['Weight'] = sorted_df_score['Rank'] / sorted_df_score['Rank'].sum()
sorted_df_score

Unnamed: 0,Score,Rank,Weight
2,0.49347,1,0.166667
3,0.549106,2,0.333333
1,0.550628,3,0.5


In [14]:
#Multiplyting result of Linear Regression, KNN Regressor and Decision Tree Regressor with their respected weight
wt_pred1 = pred1 * float(sorted_df_score.loc[[1], ['Weight']].values)
wt_pred2 = pred2 * float(sorted_df_score.loc[2, 'Weight'])
wt_pred3 = pred3 * float(sorted_df_score.loc[3, 'Weight'])
ranked_pred = wt_pred1 + wt_pred2 + wt_pred3
ranked_pred

array([ 808.73684906, 4011.63423961, 3801.12583108, ...,  439.41541517,
       1304.64811901, 3764.74410119])

In [15]:
#Calculating Score of Linear Regression, KNN Regressor, Decision Tree Regressor and Ensembled model
from sklearn.metrics import r2_score
print("Linear Regression Score : ", r2_score(y_test, pred1))
print("KNN Regressor Score : ", r2_score(y_test, pred2))
print("Decision Tree Regressor Score : ", r2_score(y_test, pred3))
print("Ensembled Model Score : ", r2_score(y_test, ranked_pred))

Linear Regression Score :  0.550628302715441
KNN Regressor Score :  0.4934702448040451
Decision Tree Regressor Score :  0.5491060514839026
Ensembled Model Score :  0.5688354630804627
