In [34]:
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
# load dataset
filename = "/content/dataset.csv"
names = ['CustomerID', 'Age', 'Gender', 'Item Purchased', 'Category', 'Purchase Amount (USD)',
         'Location', 'Size', 'Color', 'Season', 'Review Rating', 'Subscription Status',
         'Payment Method', 'Shipping Type', 'Discount Applied', 'Promo Code Used', 'Previous Purchases',
         'Preferred Payment Method', 'Frequency of Purchases']

dataframe = read_csv(filename, names=names)

dataframe = dataframe.drop(columns=['CustomerID', 'Frequency of Purchases', 'Location', 'Size', 'Color', 'Shipping Type',
                        'Item Purchased', 'Category', 'Payment Method', 'Preferred Payment Method'])
dataframe['Gender'] = dataframe['Gender'].map({'Male':1,'Female':0})
dataframe['Season'] = dataframe['Season'].map({'Winter' : 0, 'Spring' : 1, 'Summer':2, 'Fall': 3})
dataframe['Subscription Status'] = dataframe['Subscription Status'].map({'Yes':1, 'No':0})
dataframe['Discount Applied'] = dataframe['Discount Applied'].map({'Yes':1, 'No':0})
dataframe['Promo Code Used'] = dataframe['Promo Code Used'].map({'Yes':1, 'No':0})

def higher_than_50(x):
    return x > 50

dataframe['Purchase Amount (USD)'] = dataframe['Purchase Amount (USD)'].map(higher_than_50)
dataframe['Purchase Amount (USD)'] = dataframe['Purchase Amount (USD)'].map({True:1, False:0})

print(dataframe.describe())
print(dataframe.head(10))
array = dataframe.values
X = array[:,0:9]
Y = array[:,2]

models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = KFold(n_splits=2)
    cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)


# Logistic Regression.
# Linear Discriminant Analysis.
# k-Nearest Neighbors.
# Classification and Regression Trees.
# Naive Bayes.
# Support Vector Machines.

               Age       Gender  Purchase Amount (USD)       Season  \
count  3900.000000  3900.000000            3900.000000  3900.000000   
mean     44.068462     0.680000               0.611795     1.495897   
std      15.207589     0.466536               0.487404     1.117252   
min      18.000000     0.000000               0.000000     0.000000   
25%      31.000000     0.000000               0.000000     1.000000   
50%      44.000000     1.000000               1.000000     1.000000   
75%      57.000000     1.000000               1.000000     2.250000   
max      70.000000     1.000000               1.000000     3.000000   

       Review Rating  Subscription Status  Discount Applied  Promo Code Used  \
count    3900.000000          3900.000000       3900.000000      3900.000000   
mean        3.749949             0.270000          0.430000         0.430000   
std         0.716223             0.444016          0.495139         0.495139   
min         2.500000             0.00000

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LDA: 0.611795 (0.003590)
KNN: 0.656410 (0.007692)
CART: 1.000000 (0.000000)
NB: 1.000000 (0.000000)
SVM: 0.611795 (0.003590)
