In [1]:
import pandas as pd
import os

# Product Intrest prediction

In [2]:
# Navigate to project folder
project_folder = os.path.dirname(os.getcwd())

#Moving to data folder
data_folder = os.path.join(project_folder, 'data')
csv_file_path = os.path.join(data_folder, 'product_intrest.csv')

#Data readin
df = pd.read_csv(csv_file_path)
df.head()

Unnamed: 0,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,Group
0,635.0,88.0,546.0,172.0,88.0,88.0,3
1,11.0,1.0,6.0,2.0,1.0,6.0,1
2,426.0,49.0,127.0,111.0,21.0,42.0,2
3,11.0,4.0,20.0,10.0,3.0,5.0,1
4,173.0,43.0,118.0,46.0,27.0,15.0,1


In [3]:
#Testing different model and find the model which is best fit for this data
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report , accuracy_score , confusion_matrix
from sklearn.model_selection import train_test_split

x_train, x_test, y_train , y_test = train_test_split(df.drop('Group' , axis= 1) , df['Group'] , test_size= 0.2)

print("Shape of Samples\n=================")
print(f'X train : {x_train.shape}')
print(f'X test : {x_test.shape}')
print(f'y train : {y_train.shape}')
print(f'y test : {y_test.shape}')

Shape of Samples
X train : (1754, 6)
X test : (439, 6)
y train : (1754,)
y test : (439,)


In [4]:
model = RandomForestClassifier(random_state=42)
model.fit(x_train,y_train)

predict = model.predict(x_test)
print("\t\tRandom Forest Model Result\n========================================================")
print(f'\t\tAccuracy:{accuracy_score(y_test , predict)}')
print("---------------------------------------------------------")
print('Classification Report:')
print(classification_report(y_test , predict))
print("---------------------------------------------------------")
print('Confusion Matrix:')
print(confusion_matrix(y_test , predict))
print("========================================================")

		Random Forest Model Result
		Accuracy:0.9658314350797267
---------------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97        39
           1       0.99      1.00      0.99       263
           2       0.94      0.88      0.91        74
           3       0.88      0.95      0.92        63

    accuracy                           0.97       439
   macro avg       0.95      0.94      0.95       439
weighted avg       0.97      0.97      0.97       439

---------------------------------------------------------
Confusion Matrix:
[[ 37   1   1   0]
 [  0 262   1   0]
 [  0   1  65   8]
 [  0   1   2  60]]


In [5]:
from sklearn.tree import DecisionTreeClassifier
model_1 = DecisionTreeClassifier(random_state=42)
model_1.fit(x_train,y_train)

predict = model_1.predict(x_test)
print("\t\tDecision tree Model Result\n========================================================")
print(f'\t\tAccuracy:{accuracy_score(y_test , predict)}')
print("---------------------------------------------------------")
print('Classification Report:')
print(classification_report(y_test , predict))
print("---------------------------------------------------------")
print('Confusion Matrix:')
print(confusion_matrix(y_test , predict))
print("========================================================")

		Decision tree Model Result
		Accuracy:0.9384965831435079
---------------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.85      0.90        39
           1       0.98      0.98      0.98       263
           2       0.84      0.88      0.86        74
           3       0.85      0.89      0.87        63

    accuracy                           0.94       439
   macro avg       0.91      0.90      0.90       439
weighted avg       0.94      0.94      0.94       439

---------------------------------------------------------
Confusion Matrix:
[[ 33   1   5   0]
 [  0 258   3   2]
 [  0   1  65   8]
 [  1   2   4  56]]


In [6]:
from sklearn.svm import SVC
model_2 = SVC(random_state=42)
model_2.fit(x_train,y_train)

predict = model_2.predict(x_test)
print("\t\t\tSVC Model Result\n========================================================")
print(f'\t\tAccuracy:{accuracy_score(y_test , predict)}')
print("---------------------------------------------------------")
print('Classification Report:')
print(classification_report(y_test , predict))
print("---------------------------------------------------------")
print('Confusion Matrix:')
print(confusion_matrix(y_test , predict))
print("========================================================")

			SVC Model Result
		Accuracy:0.9498861047835991
---------------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.79      0.89        39
           1       0.98      0.99      0.99       263
           2       0.85      0.93      0.89        74
           3       0.90      0.90      0.90        63

    accuracy                           0.95       439
   macro avg       0.94      0.91      0.92       439
weighted avg       0.95      0.95      0.95       439

---------------------------------------------------------
Confusion Matrix:
[[ 31   2   5   1]
 [  0 260   3   0]
 [  0   0  69   5]
 [  0   2   4  57]]


In this test the Randomforest model is best fit for this data so we will save the model

In [7]:
#Save the model
import pickle
pickle.dump(model , open('product_model.pkl' , 'wb'))

# Purchase Behaviour prediction

In [8]:
csv_file_path = os.path.join(data_folder, 'purchase_behaviour.csv')

#Data readin
df = pd.read_csv(csv_file_path)
df.head()

Unnamed: 0,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,Group
0,3.0,8.0,10.0,4.0,7.0,2
1,2.0,1.0,1.0,2.0,5.0,1
2,1.0,8.0,2.0,10.0,4.0,0
3,2.0,2.0,0.0,4.0,6.0,1
4,5.0,5.0,3.0,6.0,5.0,2


In [9]:
#Testing different model and find the model which is best fit for this data
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report , accuracy_score , confusion_matrix
from sklearn.model_selection import train_test_split

x_train, x_test, y_train , y_test = train_test_split(df.drop('Group' , axis= 1) , df['Group'] , test_size= 0.2)

print("Shape of Samples\n=================")
print(f'X train : {x_train.shape}')
print(f'X test : {x_test.shape}')
print(f'y train : {y_train.shape}')
print(f'y test : {y_test.shape}')

Shape of Samples
X train : (1754, 5)
X test : (439, 5)
y train : (1754,)
y test : (439,)


In [10]:
model = RandomForestClassifier(random_state=42)
model.fit(x_train,y_train)

predict = model.predict(x_test)
print("\t\tRandom Forest Model Result\n========================================================")
print(f'\t\tAccuracy:{accuracy_score(y_test , predict)}')
print("---------------------------------------------------------")
print('Classification Report:')
print(classification_report(y_test , predict))
print("---------------------------------------------------------")
print('Confusion Matrix:')
print(confusion_matrix(y_test , predict))
print("========================================================")

		Random Forest Model Result
		Accuracy:0.9703872437357631
---------------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       138
           1       0.98      0.98      0.98       196
           2       0.98      0.90      0.94       105

    accuracy                           0.97       439
   macro avg       0.97      0.96      0.97       439
weighted avg       0.97      0.97      0.97       439

---------------------------------------------------------
Confusion Matrix:
[[138   0   0]
 [  1 193   2]
 [  7   3  95]]


In [11]:
from sklearn.tree import DecisionTreeClassifier
model_1 = DecisionTreeClassifier(random_state=42)
model_1.fit(x_train,y_train)

predict = model_1.predict(x_test)
print("\t\tDecision Tree Model Result\n========================================================")
print(f'\t\tAccuracy:{accuracy_score(y_test , predict)}')
print("---------------------------------------------------------")
print('Classification Report:')
print(classification_report(y_test , predict))
print("---------------------------------------------------------")
print('Confusion Matrix:')
print(confusion_matrix(y_test , predict))
print("========================================================")

		Decision Tree Model Result
		Accuracy:0.9567198177676538
---------------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       0.93      1.00      0.96       138
           1       0.99      0.96      0.97       196
           2       0.94      0.90      0.92       105

    accuracy                           0.96       439
   macro avg       0.95      0.95      0.95       439
weighted avg       0.96      0.96      0.96       439

---------------------------------------------------------
Confusion Matrix:
[[138   0   0]
 [  2 188   6]
 [  9   2  94]]


In [12]:
from sklearn.svm import SVC
model_2 = SVC(random_state=42)
model_2.fit(x_train,y_train)

predict = model_2.predict(x_test)
print("\t\t\tSVC Model Result\n========================================================")
print(f'\t\tAccuracy:{accuracy_score(y_test , predict)}')
print("---------------------------------------------------------")
print('Classification Report:')
print(classification_report(y_test , predict))
print("---------------------------------------------------------")
print('Confusion Matrix:')
print(confusion_matrix(y_test , predict))
print("========================================================")

			SVC Model Result
		Accuracy:0.9817767653758542
---------------------------------------------------------
Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.98       138
           1       1.00      0.99      1.00       196
           2       0.99      0.93      0.96       105

    accuracy                           0.98       439
   macro avg       0.98      0.98      0.98       439
weighted avg       0.98      0.98      0.98       439

---------------------------------------------------------
Confusion Matrix:
[[138   0   0]
 [  0 195   1]
 [  7   0  98]]


In [13]:
#Save the model
import pickle
pickle.dump(model_2 , open('purchase_model.pkl' , 'wb'))