## Imports


In [1]:
import numpy as np
import pandas as pd

from scipy.stats import rv_discrete

# import sic-learn modules/ ml algoriths
# from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold 
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# classifiers
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC


# visualization
from IPython.display import display, HTML
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns

## Load Dataset

Use pandas in order to load dataset from txt into dataframe using " " as seperator.

In [2]:
dataset_17x7_path = "datasets/17x7.txt"
dataset_20x10_path = "datasets/20x10.txt"

df_17x7 = pd.read_csv(dataset_17x7_path, sep=" ", header=None)

df_17x7

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,0,84.00,0.0,20.43,2,37.33,0.00,3.32,3,62.67,...,42.99,10,70.67,0.0,14.69,14,52.00,0.0,6.81,425.8133
1,2,45.33,0.0,4.38,4,12.00,0.00,0.46,6,60.00,...,14.24,11,88.00,0.0,25.34,13,100.00,12.0,42.07,409.8400
2,3,56.00,0.0,10.83,8,12.00,0.00,0.80,9,65.33,...,43.73,12,25.33,0.0,1.84,14,42.67,0.0,6.00,365.1733
3,2,38.67,0.0,3.85,4,9.33,0.00,0.37,6,54.67,...,15.25,10,85.33,0.0,23.06,12,21.33,0.0,1.56,380.3200
4,3,81.33,0.0,24.34,4,29.33,0.00,1.54,5,12.00,...,47.25,12,48.00,0.0,4.84,16,64.00,0.0,16.56,298.6933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7837,1,66.67,0.0,16.01,2,25.33,0.00,1.91,6,38.67,...,0.64,10,57.33,0.0,9.93,13,86.67,0.0,26.24,466.0000
7838,0,68.00,0.0,13.84,1,82.67,0.00,25.30,2,26.67,...,39.00,12,17.33,0.0,1.07,14,38.67,0.0,4.73,425.0133
7839,3,45.33,0.0,6.74,6,58.67,0.00,9.58,7,98.67,...,0.60,13,86.67,0.0,25.85,15,30.67,0.0,4.04,468.6667
7840,0,89.33,0.0,22.61,1,98.67,9.33,38.77,2,33.33,...,0.52,6,58.67,0.0,10.37,10,73.33,0.0,15.66,432.6933


## Explain Dataset Stracture

As we can see Dataset consist of:
- ***assrotments*** as rows
- each row consists of ***7 products*** and their sale statistics
- at the end of each row we have the total revenuew of this assrotments 

Next steps:
- set up collumn names
- and then seperate products in a multi index dataframe 

In [3]:
# declare main column names
mainColumNames = ["produc_id", "total_sale_avg", "exclusive_sale_avg","revenue_contribution"]

# repeat for each assortment products (SOS we use np.tile in order to keep the order of names)
columnName_17x7 = np.tile(mainColumNames, 7)
columnName_20x10 = np.tile(mainColumNames, 10)

# append total revenue column
columnName_17x7 = np.append(columnName_17x7, ["total_assortment_revenue"])
columnName_20x10 = np.append(columnName_20x10, ["total_assortment_revenue"])

print(columnName_17x7)

['produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'produc_id' 'total_sale_avg' 'exclusive_sale_avg' 'revenue_contribution'
 'total_assortment_revenue']


In [4]:
df_17x7.columns = columnName_17x7
df_17x7.head(15)

Unnamed: 0,produc_id,total_sale_avg,exclusive_sale_avg,revenue_contribution,produc_id.1,total_sale_avg.1,exclusive_sale_avg.1,revenue_contribution.1,produc_id.2,total_sale_avg.2,...,revenue_contribution.2,produc_id.3,total_sale_avg.3,exclusive_sale_avg.2,revenue_contribution.3,produc_id.4,total_sale_avg.4,exclusive_sale_avg.3,revenue_contribution.4,total_assortment_revenue
0,0,84.0,0.0,20.43,2,37.33,0.0,3.32,3,62.67,...,42.99,10,70.67,0.0,14.69,14,52.0,0.0,6.81,425.8133
1,2,45.33,0.0,4.38,4,12.0,0.0,0.46,6,60.0,...,14.24,11,88.0,0.0,25.34,13,100.0,12.0,42.07,409.84
2,3,56.0,0.0,10.83,8,12.0,0.0,0.8,9,65.33,...,43.73,12,25.33,0.0,1.84,14,42.67,0.0,6.0,365.1733
3,2,38.67,0.0,3.85,4,9.33,0.0,0.37,6,54.67,...,15.25,10,85.33,0.0,23.06,12,21.33,0.0,1.56,380.32
4,3,81.33,0.0,24.34,4,29.33,0.0,1.54,5,12.0,...,47.25,12,48.0,0.0,4.84,16,64.0,0.0,16.56,298.6933
5,0,53.33,0.0,9.65,6,41.33,0.0,6.42,7,98.67,...,14.3,11,82.67,0.0,22.02,15,26.67,0.0,3.57,434.2667
6,1,100.0,24.0,47.85,2,25.33,0.0,2.13,8,9.33,...,13.35,11,76.0,0.0,20.39,15,40.0,0.0,6.08,401.5733
7,0,77.33,0.0,16.81,2,10.67,0.0,0.79,6,56.0,...,24.2,15,26.67,0.0,3.59,16,36.0,0.0,5.44,442.64
8,2,53.33,0.0,5.4,3,65.33,0.0,12.59,4,33.33,...,17.03,7,97.33,5.33,36.49,10,92.0,0.0,23.59,414.5067
9,1,97.33,14.67,44.56,2,37.33,0.0,3.85,4,32.0,...,14.37,11,82.67,0.0,25.56,14,48.0,0.0,7.31,359.44


In [5]:
# keep total_assortment_revenue in new df and drop this column from main df
df_17x7_original = df_17x7.copy(deep = True)
df_17x7_tar = df_17x7.total_assortment_revenue
df_17x7.drop("total_assortment_revenue", axis=1, inplace=True)

df_17x7.head(5)

Unnamed: 0,produc_id,total_sale_avg,exclusive_sale_avg,revenue_contribution,produc_id.1,total_sale_avg.1,exclusive_sale_avg.1,revenue_contribution.1,produc_id.2,total_sale_avg.2,...,exclusive_sale_avg.2,revenue_contribution.2,produc_id.3,total_sale_avg.3,exclusive_sale_avg.3,revenue_contribution.3,produc_id.4,total_sale_avg.4,exclusive_sale_avg.4,revenue_contribution.4
0,0,84.0,0.0,20.43,2,37.33,0.0,3.32,3,62.67,...,16.0,42.99,10,70.67,0.0,14.69,14,52.0,0.0,6.81
1,2,45.33,0.0,4.38,4,12.0,0.0,0.46,6,60.0,...,0.0,14.24,11,88.0,0.0,25.34,13,100.0,12.0,42.07
2,3,56.0,0.0,10.83,8,12.0,0.0,0.8,9,65.33,...,20.0,43.73,12,25.33,0.0,1.84,14,42.67,0.0,6.0
3,2,38.67,0.0,3.85,4,9.33,0.0,0.37,6,54.67,...,0.0,15.25,10,85.33,0.0,23.06,12,21.33,0.0,1.56
4,3,81.33,0.0,24.34,4,29.33,0.0,1.54,5,12.0,...,17.33,47.25,12,48.0,0.0,4.84,16,64.0,0.0,16.56


In [6]:
## Make id as index on total_assortment_revenue dataframe
df_17x7_tar = df_17x7_tar.to_frame()
df_17x7_tar.index.name = "assorment_id"
df_17x7_tar.reset_index(drop=False, inplace=True)
df_17x7_tar.set_index(["assorment_id", "total_assortment_revenue"],inplace=True)

df_17x7_tar.head(5)

assorment_id,total_assortment_revenue
0,425.8133
1,409.84
2,365.1733
3,380.32
4,298.6933


In [7]:
# breake df into df list every 3 columns
dfs_17x7 = np.array_split(df_17x7, 7, axis=1)

for df in dfs_17x7:
    df.index.name = "assorment_id"
    df.reset_index(drop=False, inplace=True)
    # df = df.astype({"produc_id": int})



dfs_17x7[0].head()

Unnamed: 0,assorment_id,produc_id,total_sale_avg,exclusive_sale_avg,revenue_contribution
0,0,0.0,84.0,0.0,20.43
1,1,2.0,45.33,0.0,4.38
2,2,3.0,56.0,0.0,10.83
3,3,2.0,38.67,0.0,3.85
4,4,3.0,81.33,0.0,24.34


In [8]:
clean_df_17x7 = pd.concat(dfs_17x7)
clean_df_17x7 = clean_df_17x7.astype({"produc_id": int})
clean_df_17x7.set_index(["assorment_id", "produc_id"],inplace=True)
clean_df_17x7.tail(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_sale_avg,exclusive_sale_avg,revenue_contribution
assorment_id,produc_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7837,13,86.67,0.0,26.24
7838,14,38.67,0.0,4.73
7839,15,30.67,0.0,4.04
7840,10,73.33,0.0,15.66
7841,15,38.67,0.0,5.92


In [9]:
clean_df_17x7.sort_index().head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_sale_avg,exclusive_sale_avg,revenue_contribution
assorment_id,produc_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,84.0,0.0,20.43
0,2,37.33,0.0,3.32
0,3,62.67,0.0,11.12
0,4,17.33,0.0,0.64
0,7,100.0,16.0,42.99
0,10,70.67,0.0,14.69
0,14,52.0,0.0,6.81
1,2,45.33,0.0,4.38
1,4,12.0,0.0,0.46
1,6,60.0,0.0,11.4


##  Merge assortments with total_assortment_revenue

In [10]:
clean_df_17x7 = pd.merge(clean_df_17x7, df_17x7_tar, left_index=True, right_index=True, how='outer')


## Testing Product 7

extract statistics about product 7 avg 90% in total sales
extract statistics about product 4 avg 90% in total sales



In [11]:
clean_df_17x7.index.get_level_values('produc_id')

test_df_7 = clean_df_17x7.iloc[clean_df_17x7.index.get_level_values('produc_id') == 7]
test_df_7.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_sale_avg,exclusive_sale_avg,revenue_contribution
assorment_id,produc_id,total_assortment_revenue,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,7,425.8133,100.0,16.0,42.99
3,7,380.32,97.33,12.0,42.45
5,7,434.2667,98.67,16.0,42.13
7,7,442.64,97.33,8.0,36.71
8,7,414.5067,97.33,5.33,36.49


In [12]:
# get all assortments that cointain 7
all_assortments = test_df_7.index.get_level_values('assorment_id')
print(all_assortments)

# reset index from test 7
test_df_7.reset_index(drop=False, inplace=True)

Int64Index([   0,    3,    5,    7,    8,   10,   14,   19,   20,   21,
            ...
            7812, 7814, 7816, 7827, 7830, 7833, 7836, 7837, 7838, 7839],
           dtype='int64', name='assorment_id', length=3234)


## Calculation of exact revenue

$\frac{P(i).total\_assortment\_revenue}{100}*P(i).revenue\_contribution = E(P(i))$

In [13]:
# mia column pou na exei to akrives income tou 7
test_df_7["exact_revenue"] = (test_df_7.total_assortment_revenue / 100) * test_df_7.revenue_contribution
# auti / to total gia na vroume ton meso oro esodon tou 7
item_7_profit_avg = test_df_7["exact_revenue"].mean()
print("The mean exact revenue of item 7 is: {}".format(item_7_profit_avg))
# na vroume to pososto ton poliseon pou exei kata meso oro se ola ta assortments



test_df_7.head(5)

The mean exact revenue of item 7 is: 174.42819637753246


Unnamed: 0,assorment_id,produc_id,total_assortment_revenue,total_sale_avg,exclusive_sale_avg,revenue_contribution,exact_revenue
0,0,7,425.8133,100.0,16.0,42.99,183.057138
1,3,7,380.32,97.33,12.0,42.45,161.44584
2,5,7,434.2667,98.67,16.0,42.13,182.956561
3,7,7,442.64,97.33,8.0,36.71,162.493144
4,8,7,414.5067,97.33,5.33,36.49,151.253495


In [14]:
test_df_7[["total_assortment_revenue", "total_sale_avg", "exclusive_sale_avg", "revenue_contribution", "exact_revenue"]].describe()

Unnamed: 0,total_assortment_revenue,total_sale_avg,exclusive_sale_avg,revenue_contribution,exact_revenue
count,3234.0,3234.0,3234.0,3234.0,3234.0
mean,430.529747,98.335739,14.05692,40.768995,174.428196
std,39.529225,1.479654,3.925174,3.505331,10.266258
min,291.2,92.0,2.67,30.17,135.934387
25%,403.5667,97.33,12.0,38.36,167.533029
50%,431.18665,98.67,13.33,40.73,174.308296
75%,457.433325,100.0,16.0,43.09,181.327289
max,543.7067,100.0,29.33,53.74,207.380592


## Epeksigisi tou discribe gia to 7

- exoume 3234 assortments me to 7
- meso assortment revenue gia 430.5 gia osa periexoun to 7
- 

## Methodologia algorithmou provlepsis

The main idea is to create a Dataframe consist of 18 features. One for each item of the item Stet ()

## Format Mathimatical Regression Model

$\frac{P(i)revenue\_contribution.}{100}*P(i).exact\_revenue = E(P(i))$


$\sum_{i=0}^{17} E(P(i)) = total\_assortment\_revenue$	





In [15]:
df_17x7_original.head()

Unnamed: 0,produc_id,total_sale_avg,exclusive_sale_avg,revenue_contribution,produc_id.1,total_sale_avg.1,exclusive_sale_avg.1,revenue_contribution.1,produc_id.2,total_sale_avg.2,...,revenue_contribution.2,produc_id.3,total_sale_avg.3,exclusive_sale_avg.2,revenue_contribution.3,produc_id.4,total_sale_avg.4,exclusive_sale_avg.3,revenue_contribution.4,total_assortment_revenue
0,0,84.0,0.0,20.43,2,37.33,0.0,3.32,3,62.67,...,42.99,10,70.67,0.0,14.69,14,52.0,0.0,6.81,425.8133
1,2,45.33,0.0,4.38,4,12.0,0.0,0.46,6,60.0,...,14.24,11,88.0,0.0,25.34,13,100.0,12.0,42.07,409.84
2,3,56.0,0.0,10.83,8,12.0,0.0,0.8,9,65.33,...,43.73,12,25.33,0.0,1.84,14,42.67,0.0,6.0,365.1733
3,2,38.67,0.0,3.85,4,9.33,0.0,0.37,6,54.67,...,15.25,10,85.33,0.0,23.06,12,21.33,0.0,1.56,380.32
4,3,81.33,0.0,24.34,4,29.33,0.0,1.54,5,12.0,...,47.25,12,48.0,0.0,4.84,16,64.0,0.0,16.56,298.6933


# Metatropi tou provlimatos se regression provlima

** Basiki idea**
- Dimiourgoume ena arxiko dataframe to opoio exei 17 columns mia gia kathe proion kai kathe mia periexei tin mesi posostiea sinisfora ana proion
- Xorizoume auto to dataframe se train kai test set
- Eksagoume to avgerage tis mesis posostieas sinisforas gia to kathe proion

**Gia to erotima A**
- kanoume ena filter gia na vroume ola ta assortments pou periexoun to *Proion stoxos*
- gia kathe assortment pou periexei to proion stoxos to topotheto se ena dataframe opou apotelite apo 18 kolones
    - mia gia kathe proion tou assortment
    - kai mia gia ta anamenomena kerdi apo auto to proion
- Oi kolones pou perigrafoun to ekastote proion tou assortment tha apotelounte apo:
    - Eksagoume to avgerage tis mesis posostieas sinisforas gia to kathe proion
    - Polaplasiasmeni me 0 or 1 analogos an uparxei to proion sto assortment i oxi

    

In [16]:
def columnNames_builder(total_products, target_column_name):
    columnNames = []
    for i in range(0, total_products):
        columnNames.append("item {}".format(i))

    columnNames.append(target_column_name)
    return columnNames
    
def df_B_builder(row, total_products, columnNames):
    items = np.zeros(total_products + 1, np.float)
    
    ## set 1 to items list for every item in assortment
    for i in range(0,len(row) -1, 4):
        item_id = np.int(row[i])
        items[item_id] = 1

    ## set last column with assortment mean rev
    items[-1] = np.float(row[-1])

    return list(items)

    

In [17]:
def df_A_builder(row, total_products, columnNames, targetIndex):
    items = np.zeros(total_products + 1, np.float)
    
    ## set 1 to items list for every item in assortment
    for i in range(0,len(row) -1, 4):
        item_id = np.int(row[i])
        items[item_id] = 1
        # items[item_id] =  row[-1] / 100 * row[i + 3]

        if item_id == targetIndex:
            ## set last column with assortment mean rev
            items[-1] = np.float( row[-1] / 100 * row[i + 3])

    return list(items)

## Get filtered df by item id
def get_assortments_with_item_id(clean_df, item_id):
    filtered_df = clean_df_17x7.iloc[clean_df.index.get_level_values('produc_id') == item_id]
    return filtered_df.index.get_level_values('assorment_id')


In [18]:
target_item_id = 4

assortment_ids = get_assortments_with_item_id(clean_df_17x7, target_item_id)

filtered_by_item_df = df_17x7_original.iloc[assortment_ids]
filtered_by_item_df

Unnamed: 0,produc_id,total_sale_avg,exclusive_sale_avg,revenue_contribution,produc_id.1,total_sale_avg.1,exclusive_sale_avg.1,revenue_contribution.1,produc_id.2,total_sale_avg.2,...,revenue_contribution.2,produc_id.3,total_sale_avg.3,exclusive_sale_avg.2,revenue_contribution.3,produc_id.4,total_sale_avg.4,exclusive_sale_avg.3,revenue_contribution.4,total_assortment_revenue
0,0,84.00,0.0,20.43,2,37.33,0.0,3.32,3,62.67,...,42.99,10,70.67,0.00,14.69,14,52.00,0.00,6.81,425.8133
1,2,45.33,0.0,4.38,4,12.00,0.0,0.46,6,60.00,...,14.24,11,88.00,0.00,25.34,13,100.00,12.00,42.07,409.8400
3,2,38.67,0.0,3.85,4,9.33,0.0,0.37,6,54.67,...,15.25,10,85.33,0.00,23.06,12,21.33,0.00,1.56,380.3200
4,3,81.33,0.0,24.34,4,29.33,0.0,1.54,5,12.00,...,47.25,12,48.00,0.00,4.84,16,64.00,0.00,16.56,298.6933
8,2,53.33,0.0,5.40,3,65.33,0.0,12.59,4,33.33,...,17.03,7,97.33,5.33,36.49,10,92.00,0.00,23.59,414.5067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7828,4,10.67,0.0,0.36,6,34.67,0.0,5.56,9,50.67,...,23.70,13,94.67,10.67,38.78,14,21.33,0.00,2.44,406.5067
7831,4,14.67,0.0,0.56,6,52.00,0.0,10.17,9,65.33,...,40.97,12,24.00,0.00,1.70,15,36.00,0.00,5.93,368.4000
7832,1,80.00,0.0,24.36,3,37.33,0.0,6.01,4,20.00,...,9.12,10,69.33,0.00,14.65,13,97.33,17.33,42.05,414.0267
7833,4,24.00,0.0,0.90,5,13.33,0.0,0.45,6,46.67,...,12.39,11,76.00,0.00,21.94,16,34.67,0.00,5.79,372.9067


In [19]:
columnNames = columnNames_builder(17, "target_revenue_contribution")

# df_B = pd.DataFrame(columns=columnNames, index=df_17x7_original.index)
test = filtered_by_item_df.apply(lambda row: df_A_builder(row, 17, columnNames, target_item_id), axis=1)

# create df in order to predict target_revenue_contribution
df_A = pd.DataFrame(list(test), columns=columnNames)
df_A.drop("item {}".format(target_item_id), axis=1, inplace=True)
df_A

Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,target_revenue_contribution
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,2.725205
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.885264
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.407184
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,4.599877
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.761643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.463424
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,2.063040
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,2.980992
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,3.356160


In [20]:
columnNames = columnNames_builder(17, "total_assortment_revenue")
print(columnNames)

# df_B = pd.DataFrame(columns=columnNames, index=df_17x7_original.index)
test = df_17x7_original.apply(lambda row: df_B_builder(row, 17, columnNames), axis=1)


df_B = pd.DataFrame(list(test), columns=columnNames)
df_B

['item 0', 'item 1', 'item 2', 'item 3', 'item 4', 'item 5', 'item 6', 'item 7', 'item 8', 'item 9', 'item 10', 'item 11', 'item 12', 'item 13', 'item 14', 'item 15', 'item 16', 'total_assortment_revenue']


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,total_assortment_revenue
0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,425.8133
1,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,409.8400
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,365.1733
3,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,380.3200
4,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,298.6933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7837,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,466.0000
7838,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,425.0133
7839,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,468.6667
7840,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,432.6933


# CASE A
### Split dataset to train and test

In [21]:
y = df_A['target_revenue_contribution']
x = df_A.drop("target_revenue_contribution", axis=1)

# x = StandardScaler().fit_transform(x)  # normalizing the features
# print("\nShape of normalized data:", x.shape)
# print("\nPrints mean:", np.mean(x), " and Standard deviation of normalized dataset: ", np.std(x))

# y = StandardScaler().fit_transform(y)  # normalizing the features
# print("\nShape of normalized data:", y.shape)
# print("\nPrints mean:", np.mean(y), " and Standard deviation of normalized dataset: ", np.std(y))

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# this list will contain results in dict format in order to display them into dataframe.
results = []

In [22]:
x

Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


### Linear Regression For Case A

In [23]:
lr = LinearRegression()
lr.fit(X_train, y_train)
pred = lr.predict(X_train)
print('\nLinear Regression - Train accuracy: ', r2_score(y_train, pred))

pred = lr.predict(X_test)
print('\nLinear Regression - Test accuracy: ', r2_score(y_test, pred))

results.append({
    "algorithm": "LinearRegression",
    "train_score": r2_score(y_train, lr.predict(X_train)),
    "test_score": r2_score(y_test, lr.predict(X_test))
})



Linear Regression - Train accuracy:  0.7008274232424777

Linear Regression - Test accuracy:  0.6899977861217835


In [24]:
mse = mean_squared_error(y_test, lr.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
print("The root mean squared error (RMSE) on test set: {:.4f}".format(np.sqrt(mse)))


The mean squared error (MSE) on test set: 0.6088
The root mean squared error (RMSE) on test set: 0.7802


# Applay gradiant boosting for A

In [25]:
params = {'n_estimators': 500,
          'max_depth': 2,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'ls'}

reg = GradientBoostingRegressor(**params)
reg.fit(X_train, y_train)

mse = mean_squared_error(y_test, reg.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
print("The root mean squared error (RMSE) on test set: {:.4f}".format(np.sqrt(mse)))

The mean squared error (MSE) on test set: 0.6099
The root mean squared error (RMSE) on test set: 0.7810


## Understanding results for case A (TODO REMOVE THEM)

In [26]:
np.array(y_test)[0:5]

array([2.21808553, 4.88177114, 2.34613617, 1.52501083, 1.733416  ])

In [27]:
print(list(pred)[0:5])

[2.10498046875, 4.740478515625, 2.198974609375, 2.121337890625, 1.980712890625]


In [28]:
results

[{'algorithm': 'LinearRegression',
  'train_score': 0.7008274232424777,
  'test_score': 0.6899977861217835}]

In [29]:
lr.coef_

array([2.57349117e+11, 2.57349117e+11, 2.57349117e+11, 2.57349117e+11,
       2.57349117e+11, 2.57349117e+11, 2.57349117e+11, 2.57349117e+11,
       2.57349117e+11, 2.57349117e+11, 2.57349117e+11, 2.57349117e+11,
       2.57349117e+11, 2.57349117e+11, 2.57349117e+11, 2.57349117e+11])

In [30]:
w = np.zeros(16, np.float)
w[[4,5,7,9,10,11]] = 1
y = np.array([37.51])
my_ass = w * lr.coef_


my_ass = np.array([0, 0, 0, 0, 1.74, 0.72, 0, 4.03, 0, 20.53, 27.12, 5.69, 0, 0, 0, 0])
my_ass = np.array([0, 0, 0, 0, 6.759552, 2.797056, 0, 15.655744, 0, 79.754944, 105.355776, 22.104512, 0, 0, 0, 0])
my_ass = np.array([0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0])

In [31]:
pred = lr.predict(my_ass.reshape(1,-1))
pred

array([4.67260742])

In [32]:
X_test

Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
449,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0
2098,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
299,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
3069,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0
1590,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1221,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
252,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
2577,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0
2162,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0


In [33]:
y_test

449     2.218086
2098    4.881771
299     2.346136
3069    1.525011
1590    1.733416
          ...   
1221    6.069051
252     1.235600
2577    1.584960
2162    1.578048
2177    3.948048
Name: target_revenue_contribution, Length: 969, dtype: float64

# Case B perform regression.

In [34]:
y = df_B['total_assortment_revenue']
x = df_B.drop("total_assortment_revenue", axis=1)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# this list will contain results in dict format in order to display them into dataframe.
results = []

In [35]:
x

Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
3,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7837,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
7838,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
7839,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0
7840,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
lr = LinearRegression()
lr.fit(X_train, y_train)
pred = lr.predict(X_train)
print('\nLinear Regression - Train accuracy: ', r2_score(y_train, pred))

pred = lr.predict(X_test)
print('\nLinear Regression - Test accuracy: ', r2_score(y_test, pred))

results.append({
    "algorithm": "LinearRegression",
    "train_score": r2_score(y_train, lr.predict(X_train)),
    "test_score": r2_score(y_test, lr.predict(X_test))
})


Linear Regression - Train accuracy:  0.8037139428977266

Linear Regression - Test accuracy:  0.804360697473882


In [37]:
mse = mean_squared_error(y_test, lr.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
print("The root mean squared error (RMSE) on test set: {:.4f}".format(np.sqrt(mse)))

The mean squared error (MSE) on test set: 409.7467
The root mean squared error (RMSE) on test set: 20.2422


## Applay gradiant boosting for B


In [38]:
params = {'n_estimators': 500,
          'max_depth': 2,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'ls'}

reg = GradientBoostingRegressor(**params)
reg.fit(X_train, y_train)

mse = mean_squared_error(y_test, reg.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))
print("The root mean squared error (RMSE) on test set: {:.4f}".format(np.sqrt(mse)))

The mean squared error (MSE) on test set: 566.7541
The root mean squared error (RMSE) on test set: 23.8066


## Understanding results for case A (TODO REMOVE THEM)

In [39]:
np.array(y_test)[0:5]

array([354.1067, 451.12  , 443.7067, 465.04  , 406.0533])

In [40]:
print(list(pred)[0:5])

[374.90625, 430.28125, 441.21875, 477.0625, 399.0625]


# Modelopoiish gia Classification

Efarmozoume tin idia logiki gia na paraksoume ena Dataframe pou tha exei os Columns ta proionta kai os metavliti stoxo tha exei:
- Gia to ertima A2 mia katigoriki metvliti pou tha ipodilonei an to proion i ws meros tou sigkekrimenou Assortment apodidei perisotero apo thn mesi timi twn meson anamenomenon esodon i oxi
- Gia to erotima B2 mia katigoriki metavliti pou tha ipodilonei an to assortment apodidei perisotero apo tin mesa timi anamenomen esodon

kai stis 2 periptosis i mesi timi ipologizete epano se ena training set.

In [41]:
def clean_df_builder(row, total_products, columnNames):
    items = np.zeros(total_products + 1, np.float)
    
    ## set 1 to items list for every item in assortment
    for i in range(0,len(row) -1, 4):
        item_id = np.int(row[i])
        items[item_id] = row[i + 3]

    ## set last column with assortment mean rev
    items[-1] = np.float(row[-1])

    return list(items)

def mean_item_rev_cont(col):
    np_col = col.to_numpy()
    np_mean = np.mean(np_col[np_col > 0])
    return np_mean

def clasify_df_A_builder(row, total_products, targetIndex, itemAvg):
    items = np.zeros(total_products + 1, np.float)
    
    ## set 1 to items list for every item in assortment
    for i in range(0,len(row) -1):
        items[i] = 1 if row[i] > 0 else 0
        # items[item_id] =  row[-1] / 100 * row[i + 3]

        if i == targetIndex:
            ## set last column with assortment mean revtarget_item_id
            # items[-1] =  row[i] 
            items[-1] = 1 if row["item {}".format(target_item_id)] > itemAvg else 0
    return list(items)

def clasify_df_B_builder(row, total_products, targetIndex, itemAvg):
    items = np.zeros(total_products + 1, np.float)
    
    ## set 1 to items list for every item in assortment
    for i in range(0,len(row) -1):
        items[i] = 1 if row[i] > 0 else 0
        # items[item_id] =  row[-1] / 100 * row[i + 3]


    ## set last column with assortment mean revtarget_item_id
    # items[-1] = row[-1] 
    items[-1] = 1 if row[-1] > itemAvg else 0

    return list(items)

In [42]:
columnNames = columnNames_builder(17, "total_assortment_revenue")

apply_result = df_17x7_original.apply(lambda row: clean_df_builder(row, 17, columnNames), axis=1)


transformed_df = pd.DataFrame(list(apply_result), columns=columnNames)
transformed_df

Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,total_assortment_revenue
0,20.43,0.00,3.32,11.12,0.64,0.00,0.00,42.99,0.00,0.00,14.69,0.00,0.00,0.00,6.81,0.00,0.00,425.8133
1,0.00,0.00,4.38,0.00,0.46,0.00,11.40,0.00,2.10,14.24,0.00,25.34,0.00,42.07,0.00,0.00,0.00,409.8400
2,0.00,0.00,0.00,10.83,0.00,0.00,0.00,0.00,0.80,14.16,21.32,43.73,1.84,0.00,6.00,0.00,0.00,365.1733
3,0.00,0.00,3.85,0.00,0.37,0.00,10.79,42.45,0.00,15.25,23.06,0.00,1.56,0.00,0.00,0.00,0.00,380.3200
4,0.00,0.00,0.00,24.34,1.54,0.56,0.00,0.00,3.58,0.00,0.00,47.25,4.84,0.00,0.00,0.00,16.56,298.6933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7837,0.00,16.01,1.91,0.00,0.00,0.00,5.69,36.91,0.64,0.00,9.93,0.00,0.00,26.24,0.00,0.00,0.00,466.0000
7838,13.84,25.30,2.20,9.87,0.00,0.00,0.00,39.00,0.00,0.00,0.00,0.00,1.07,0.00,4.73,0.00,0.00,425.0133
7839,0.00,0.00,0.00,6.74,0.00,0.00,9.58,37.85,0.00,0.00,14.02,0.00,0.60,25.85,0.00,4.04,0.00,468.6667
7840,22.61,38.77,2.86,7.87,0.00,0.52,10.37,0.00,0.00,0.00,15.66,0.00,0.00,0.00,0.00,0.00,0.00,432.6933


## Diaxorizoume to dataset se train kai test
Diaxorizoume to metasximatismeno/katharismono DF se train kai test Set
Auto to epitixanoume kratontas tous indexes gia kathe row

In [43]:
y = transformed_df['total_assortment_revenue']
x = transformed_df.drop("total_assortment_revenue", axis=1)


X_train, X_test, _, _ = train_test_split(x.index, y, test_size=0.3, random_state=42)

# this list will contain results in dict format in order to display them into dataframe.
results = []

In [44]:
print("------------------- X train is ------------------------")
print(X_train)
print("------------------- X test is ------------------------")
print(X_test)

------------------- X train is ------------------------
Int64Index([ 605, 3196, 3779, 1468, 5784, 6598, 4209, 5433, 3975, 6891,
            ...
            6265, 5734, 3092, 3772, 5191, 5226, 5390,  860, 7603, 7270],
           dtype='int64', length=5489)
------------------- X test is ------------------------
Int64Index([6667, 2166, 3422, 1905, 1454, 3307, 4845, 2464, 7708, 6259,
            ...
            6865, 6534, 2453, 1611, 4169, 7092, 4639, 4050, 3574, 3693],
           dtype='int64', length=2353)


### Eksagoume avg tou mean_contribution

Eksagoume ton meso oro tis mesis posostieas sinisforas gia to train set
(ton meso oro apo ta assortemts sta opoia emfanizete to proion)

In [45]:

avg_series = transformed_df.apply(lambda col: mean_item_rev_cont(col))
avg_series

item 0                       18.916481
item 1                       30.813594
item 2                        3.507274
item 3                       10.802318
item 4                        0.785088
item 5                        0.505239
item 6                       12.662482
item 7                       40.768995
item 8                        1.750305
item 9                       14.508958
item 10                      18.173242
item 11                      24.372052
item 12                       2.281954
item 13                      35.763209
item 14                       5.672377
item 15                       7.796627
item 16                       9.509199
total_assortment_revenue    408.477688
dtype: float64

In [46]:
target_item_id = 4

assortment_ids = get_assortments_with_item_id(clean_df_17x7, target_item_id)

filtered_by_item_df = transformed_df.iloc[assortment_ids]
filtered_by_item_df

Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,total_assortment_revenue
0,20.43,0.00,3.32,11.12,0.64,0.00,0.00,42.99,0.00,0.00,14.69,0.00,0.00,0.00,6.81,0.00,0.00,425.8133
1,0.00,0.00,4.38,0.00,0.46,0.00,11.40,0.00,2.10,14.24,0.00,25.34,0.00,42.07,0.00,0.00,0.00,409.8400
3,0.00,0.00,3.85,0.00,0.37,0.00,10.79,42.45,0.00,15.25,23.06,0.00,1.56,0.00,0.00,0.00,0.00,380.3200
4,0.00,0.00,0.00,24.34,1.54,0.56,0.00,0.00,3.58,0.00,0.00,47.25,4.84,0.00,0.00,0.00,16.56,298.6933
8,0.00,0.00,5.40,12.59,1.39,0.85,17.03,36.49,0.00,0.00,23.59,0.00,0.00,0.00,0.00,0.00,0.00,414.5067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7828,0.00,0.00,0.00,0.00,0.36,0.00,5.56,0.00,0.00,9.12,14.71,23.70,0.00,38.78,2.44,0.00,0.00,406.5067
7831,0.00,0.00,0.00,0.00,0.56,0.00,10.17,0.00,0.00,14.50,24.83,40.97,1.70,0.00,0.00,5.93,0.00,368.4000
7832,0.00,24.36,0.00,6.01,0.72,0.43,9.12,0.00,0.00,0.00,14.65,0.00,0.00,42.05,0.00,0.00,0.00,414.0267
7833,0.00,0.00,0.00,0.00,0.90,0.45,8.49,47.36,0.00,12.39,0.00,21.94,0.00,0.00,0.00,0.00,5.79,372.9067


In [47]:
columnNames = columnNames_builder(17, "label")

# df_B = pd.DataFrame(columns=columnNames, index=df_17x7_original.index)
test = filtered_by_item_df.apply(lambda row: clasify_df_A_builder(row, 17, target_item_id, avg_series["item {}".format(target_item_id)]), axis=1)

# create df in order to predict target_revenue_contribution
df_classify_A = pd.DataFrame(list(test), columns=columnNames)
df_classify_A.drop("item {}".format(target_item_id), axis=1, inplace=True)
df_classify_A

Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


In [48]:
# step 1
label = df_classify_A["label"].to_numpy()
print(label)

x = df_classify_A.drop(['label'], axis = 1)

# step 2
x = StandardScaler().fit_transform(x)  # normalizing the features
print("\nShape of normalized data:", x.shape)

# elenxoume an to kanonikopoiimeno mas data set exei mean 0 kai tipiki apoklisi 1
print("\nPrints mean:", np.mean(x), " and Standard deviation of normalized dataset: ", np.std(x))

# step 3: change feature name
feature_columns = ['item ' + str(i) for i in range(x.shape[1])]
normalized_dataFrame = pd.DataFrame(x, columns=feature_columns)

display(normalized_dataFrame.head())

[0. 0. 0. ... 0. 1. 0.]

Shape of normalized data: (3229, 16)

Prints mean: -2.420554380105637e-17  and Standard deviation of normalized dataset:  1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.76395,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.76395,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.76395,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.76395,-0.764458,-0.785961


In [49]:
def prepare_df_for_classify(df, selected_index = None):
    columnNames = columnNames_builder(17, "label")

    # df_B = pd.DataFrame(columns=columnNames, index=df_17x7_original.index)
    test = filtered_by_item_df.apply(lambda row: clasify_df_A_builder(row, 17, target_item_id, avg_series["item {}".format(target_item_id)]), axis=1)

    # create df in order to predict target_revenue_contribution
    df_result = pd.DataFrame(list(test), columns=columnNames)

    # drop selected index if needed
    if selected_index is not None:
        df_result.drop("item {}".format(target_item_id), axis=1, inplace=True)

    # display(df_result)
    return df_result

def get_avg_series(df, selected_index):
    # export average for each product and assortment reveniew on test set
    avg_series = df.apply(lambda col: mean_item_rev_cont(col))
    # print (avg_series)

    return avg_series

def normalize_df(df, targetLabel):

    # step 1
    print(targetLabel)
    label = df[targetLabel].to_numpy()
    print(label)

    x = df.drop([targetLabel], axis = 1)
    display(x)
    # step 2
    x = StandardScaler().fit_transform(x)  # normalizing the features
    # print("\nShape of normalized data:", x.shape)

    # elenxoume an to kanonikopoiimeno mas data set exei mean 0 kai tipiki apoklisi 1
    # print("\nPrints mean:", np.mean(x), " and Standard deviation of normalized dataset: ", np.std(x))

    # step 3: change feature name
    feature_columns = ['item ' + str(i) for i in range(x.shape[1])]
    normalized_dataFrame = pd.DataFrame(x, columns=feature_columns)

    return normalized_dataFrame, label

def kFoldCV(df, targetLabel, classifiers):
    """
    Orizoume mia helper function gia na pragmatopoiei k-fold cross validation
 
    :param df: the x dataset of cross validation
    :param label: the y dataset of cross validation
    :param classifiers: is a dictionary of classifiers in the folowung format
    {
        "name": "KNN",
        "classifier": KNeighborsClassifier(),
        "train_scores": [],
        "acc_scores": []
    }
    :return: returns the trained classifiers dict
    """
    kf = KFold(n_splits=10, random_state=None) 

    for train_index, test_index in kf.split(df):

        # update train and test y
        train_df = df.iloc[train_index]
        avg_series = get_avg_series(train_df, targetLabel)

        # prepare df for classify
        # 1 convert all features to 0 and 1 
        # conver label based on avg_mean of selected index
        updated_df = prepare_df_for_classify(df, targetLabel if targetLabel != "label" else None)
        display(updated_df)


        normalized_df, label = normalize_df(updated_df, "label")
        display(normalized_df)

        X_train, X_test = normalized_df.loc[train_index, :], normalized_df.loc[test_index, :]
        y_train, y_test = label[train_index], label[test_index]

        # fit train dataset to classifiers
        for c in classifiers:
            c["classifier"].fit(X_train, y_train)
            c["train_scores"].append( c["classifier"].score(X_train, y_train))
            c["acc_scores"].append( c["classifier"].score(X_test, y_test))

    return classifiers

In [50]:
classifiers = [
    {
        "name": "KNN",
        "classifier": KNeighborsClassifier(),
        "train_scores": [],
        "acc_scores": []
    },
    {
        "name": "LogisticRegression",
        "classifier": LogisticRegression(),
        "train_scores": [],
        "acc_scores": []
    },
    {
        "name": "LinearRegression",
        "classifier": LinearRegression(),
        "train_scores": [],
        "acc_scores": []
    },
    {
        "name": "DecisionTreeClassifier",
        "classifier": DecisionTreeClassifier(),
        "train_scores": [],
        "acc_scores": [] 
    },
    {
        "name": "GaussianNB",
        "classifier": GaussianNB(),
        "train_scores": [],
        "acc_scores": [] 
    },
    {
        "name": "SVC",
        "classifier": SVC(),
        "train_scores": [],
        "acc_scores": [] 
    }
]

In [51]:
# this list will contain results in dict format in order to display them into dataframe.
results = []

classifier = kFoldCV(filtered_by_item_df, "item {}".format(target_item_id), classifiers)

# ipologizoume tin mesi timi ton apotelesmaton tou taksinomiti mas pou proekipsan apo tin 10-fold-cross-validation
for c in classifiers:
    print("Results for classifier {}: training score: {:.4f}, Accuracy: {:.4f} using 2 Principal Components and simple kfold \n".format(c["name"], np.mean(c["train_scores"]), np.mean(c["acc_scores"])))
    results.append({
    "algorithm": c["name"],
    "test": "2D-PCA",
    "folds": "K-Fold",
    "train_score": np.mean(c["train_scores"]),
    "test_score": np.mean(c["acc_scores"])
    })


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15
0,1.284939,-0.781328,1.282402,1.274835,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


Results for classifier KNN: training score: 0.9505, Accuracy: 0.9452 using 2 Principal Components and simple kfold 

Results for classifier LogisticRegression: training score: 0.9498, Accuracy: 0.9498 using 2 Principal Components and simple kfold 

Results for classifier LinearRegression: training score: 0.8090, Accuracy: 0.8068 using 2 Principal Components and simple kfold 

Results for classifier DecisionTreeClassifier: training score: 1.0000, Accuracy: 0.8888 using 2 Principal Components and simple kfold 

Results for classifier GaussianNB: training score: 0.9498, Accuracy: 0.9498 using 2 Principal Components and simple kfold 

Results for classifier SVC: training score: 0.9498, Accuracy: 0.9498 using 2 Principal Components and simple kfold 



## Erotima B2 Classification 

In [52]:
columnNames = columnNames_builder(17, "label")

# df_B = pd.DataFrame(columns=columnNames, index=df_17x7_original.index)
test = transformed_df.apply(lambda row: clasify_df_B_builder(row, 17, target_item_id, avg_series["total_assortment_revenue"]), axis=1)

# create df in order to predict target_revenue_contribution
df_classify_B = pd.DataFrame(list(test), columns=columnNames)
# df_classify_B.drop("item {}".format(target_item_id), axis=1, inplace=True)
df_classify_B

Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
1,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7837,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
7838,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
7839,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0
7840,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [53]:
# step 1
label = df_classify_B["label"].to_numpy()
print(label)

x = df_classify_B.drop(['label'], axis = 1)

# step 2
x = StandardScaler().fit_transform(x)  # normalizing the features
print("\nShape of normalized data:", x.shape)

# elenxoume an to kanonikopoiimeno mas data set exei mean 0 kai tipiki apoklisi 1
print("\nPrints mean:", np.mean(x), " and Standard deviation of normalized dataset: ", np.std(x))

# step 3: change feature name
feature_columns = ['item ' + str(i) for i in range(x.shape[1])]
normalized_dataFrame = pd.DataFrame(x, columns=feature_columns)

display(normalized_dataFrame.head())

[1. 1. 0. ... 1. 1. 0.]

Shape of normalized data: (7842, 17)

Prints mean: 8.16798492695706e-18  and Standard deviation of normalized dataset:  1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.190227,-0.830276,1.183992,1.199979,1.195247,-0.835106,-0.836207,1.193675,-0.832469,-0.8294,1.192106,-0.840176,-0.839955,-0.843493,1.205373,-0.8294,-0.84593
1,-0.840176,-0.830276,1.183992,-0.833348,1.195247,-0.835106,1.195877,-0.837749,1.201245,1.205692,-0.838851,1.190227,-0.839955,1.185547,-0.829619,-0.8294,-0.84593
2,-0.840176,-0.830276,-0.8446,1.199979,-0.836647,-0.835106,-0.836207,-0.837749,1.201245,1.205692,1.192106,1.190227,1.19054,-0.843493,1.205373,-0.8294,-0.84593
3,-0.840176,-0.830276,1.183992,-0.833348,1.195247,-0.835106,1.195877,1.193675,-0.832469,1.205692,1.192106,-0.840176,1.19054,-0.843493,-0.829619,-0.8294,-0.84593
4,-0.840176,-0.830276,-0.8446,1.199979,1.195247,1.197452,-0.836207,-0.837749,1.201245,-0.8294,-0.838851,1.190227,1.19054,-0.843493,-0.829619,-0.8294,1.18213


In [54]:
# reset classifiers
for c in classifiers:
    c["acc_scores"] = []
    c["train_scores"] = []

classifier = kFoldCV(transformed_df, "label", classifiers)

# ipologizoume tin mesi timi ton apotelesmaton tou taksinomiti mas pou proekipsan apo tin 10-fold-cross-validation
for c in classifiers:
    print("Results for classifier {}: training score: {:.4f}, Accuracy: {:.4f} using 2 Principal Components and simple kfold \n".format(c["name"], np.mean(c["train_scores"]), np.mean(c["acc_scores"])))
    results.append({
    "algorithm": c["name"],
    "test": "2D-PCA",
    "folds": "K-Fold",
    "train_score": np.mean(c["train_scores"]),
    "test_score": np.mean(c["acc_scores"])
    })

Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16,label
0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0


label
[0. 0. 0. ... 0. 1. 0.]


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3225,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0
3226,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3227,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0


Unnamed: 0,item 0,item 1,item 2,item 3,item 4,item 5,item 6,item 7,item 8,item 9,item 10,item 11,item 12,item 13,item 14,item 15,item 16
0,1.284939,-0.781328,1.282402,1.274835,0.0,-0.760903,-0.779787,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,1.308986,-0.764458,-0.785961
1,-0.778247,-0.781328,1.282402,-0.784415,0.0,-0.760903,1.282402,-0.773637,1.302911,1.304642,-0.768531,1.288333,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
2,-0.778247,-0.781328,1.282402,-0.784415,0.0,-0.760903,1.282402,1.292595,-0.767512,1.304642,1.301183,-0.776197,1.271493,-0.776197,-0.763950,-0.764458,-0.785961
3,-0.778247,-0.781328,-0.779787,1.274835,0.0,1.314228,-0.779787,-0.773637,1.302911,-0.766494,-0.768531,1.288333,1.271493,-0.776197,-0.763950,-0.764458,1.272327
4,-0.778247,-0.781328,1.282402,1.274835,0.0,1.314228,1.282402,1.292595,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,-0.776197,-0.763950,-0.764458,-0.785961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3224,-0.778247,-0.781328,-0.779787,-0.784415,0.0,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,-0.786477,1.288333,1.308986,-0.764458,-0.785961
3225,-0.778247,-0.781328,-0.779787,-0.784415,0.0,-0.760903,1.282402,-0.773637,-0.767512,1.304642,1.301183,1.288333,1.271493,-0.776197,-0.763950,1.308116,-0.785961
3226,-0.778247,1.279872,-0.779787,1.274835,0.0,1.314228,1.282402,-0.773637,-0.767512,-0.766494,1.301183,-0.776197,-0.786477,1.288333,-0.763950,-0.764458,-0.785961
3227,-0.778247,-0.781328,-0.779787,-0.784415,0.0,1.314228,1.282402,1.292595,-0.767512,1.304642,-0.768531,1.288333,-0.786477,-0.776197,-0.763950,-0.764458,1.272327


KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([3229, 3230, 3231, 3232, 3233,\n            ...\n            7837, 7838, 7839, 7840, 7841],\n           dtype='int64', length=4613). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"