In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn import metrics


In [0]:
train = pd.read_csv('https://raw.githubusercontent.com/Aswath98/MarketingAnalytics/master/DataToModel.csv',index_col =[0])
test = pd.read_csv('https://raw.githubusercontent.com/Aswath98/MarketingAnalytics/master/TestData.csv',index_col =[0])

In [0]:
print(train.columns)
test.columns

In [0]:
del train['Outlet_Establishment_Year']
del test['Outlet_Establishment_Year']

In [0]:
IDcol = ['Item_Identifier','Outlet_Identifier']
predictors = [x for x in train.columns if x not in ['Item_Outlet_Sales']+IDcol]

In [0]:
predictors

In [0]:
X_train = train[predictors]
Y_train = train['Item_Outlet_Sales']
X_test = test[predictors]

In [0]:
regressor = LinearRegression(normalize=True)  
regressor.fit(X_train, Y_train) #training the algorithm

In [0]:
print(regressor.intercept_)
#For retrieving the slope:
print(regressor.coef_)

In [0]:
y_pred = regressor.predict(X_test)

In [0]:
len(y_pred)

In [0]:
test['Item_Outlet_Sales']=y_pred

In [0]:
sample = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]

In [0]:
sample.to_csv('LinearRegressionSubmission.csv')

In [0]:
ridge = Ridge(alpha=0.05,normalize=True)
ridge.fit(X_train, Y_train) 
ridge_prediction = ridge.predict(X_test)
test['Item_Outlet_Sales']=ridge_prediction
ridgePred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
ridgePred.to_csv('RidgeRegressionSubmission.csv')

In [0]:
test.head()

In [0]:
from sklearn.tree import DecisionTreeRegressor
DT = DecisionTreeRegressor(max_depth=15, min_samples_leaf=100)
DT.fit(X_train, Y_train) 
DT_prediction = DT.predict(X_test)
test['Item_Outlet_Sales']=DT_prediction
DTPred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
DTPred.to_csv('DTRegressionSubmission.csv')

In [0]:
from sklearn.tree import DecisionTreeRegressor
DT1 = DecisionTreeRegressor(max_depth=8, min_samples_leaf=150)
DT1.fit(X_train, Y_train) 
DT1_prediction = DT1.predict(X_test)
test['Item_Outlet_Sales']=DT1_prediction
DT1Pred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
DT1Pred.to_csv('DTRegression1Submission.csv',index=False)

In [0]:
from sklearn.ensemble import RandomForestRegressor
RF = RandomForestRegressor(n_estimators=200,max_depth=5, min_samples_leaf=100,n_jobs=4)
RF.fit(X_train, Y_train) 
RF_prediction = RF.predict(X_test)
test['Item_Outlet_Sales']=RF_prediction
RFPred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
RFPred.to_csv('RandomForestRegression1Submission.csv',index=False)

In [0]:
!pip install lightgbm

In [0]:
import lightgbm

In [0]:
params = {
    'objective': 'regression',
    'metric': {'l2', 'l1'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}
light = lightgbm.LGBMRegressor(num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=100, subsample_for_bin=200000, objective=None, class_weight=None, min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, random_state=None, n_jobs=-1, silent=True, importance_type='split')
light.fit(X_train, Y_train) 
light_prediction = light.predict(X_test)
test['Item_Outlet_Sales']=light_prediction
lightPred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
lightPred.to_csv('LightGBMRegression11Submission.csv',index=False)



In [0]:
from xgboost import XGBRegressor
xg = XGBRegressor()
xg.fit(X_train, Y_train) 
xg_prediction = xg.predict(X_test)
test['Item_Outlet_Sales']=xg_prediction
xgPred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
xgPred.to_csv('XGBoostRegressionSubmission.csv',index=False)

In [0]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor

gb=GradientBoostingRegressor(alpha=0.999, criterion='friedman_mse', init=None,
                          learning_rate=0.061, loss='huber', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=4, min_samples_split=4,
                          min_weight_fraction_leaf=0.0, n_estimators=102,
                          n_iter_no_change=None, presort='auto',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)
gb.fit(X_train, Y_train) 
gb_prediction = gb.predict(X_test)
test['Item_Outlet_Sales']=gb_prediction
gbPred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
gbPred.to_csv('GradientBoostingRegressionSubmission.csv',index=False)

In [0]:
import sklearn
extra = sklearn.ensemble.ExtraTreesRegressor(n_estimators=100, criterion='mse', max_depth=None, 
    min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0,
    max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, 
    min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None,
    random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None)
extra.fit(X_train, Y_train) 
extra_prediction = extra.predict(X_test)
test['Item_Outlet_Sales']=extra_prediction
extraPred = test[['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales']]
extraPred.to_csv('ExtratreeRegressionSubmission.csv',index=False)