In [1]:
import pandas as pd
from sklearn import preprocessing
import holidays
import datetime
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px
from sklearn.linear_model import LinearRegression
from flask import Flask, request, jsonify, render_template
import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv('customer_shopping_data.csv')
print(df.shape)
df

(99457, 10)


Unnamed: 0,invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall
0,I138884,C241288,Female,28,Clothing,5,1500.40,Credit Card,5/8/22,Kanyon
1,I317333,C111565,Male,21,Shoes,3,1800.51,Debit Card,12/12/21,Forum Istanbul
2,I127801,C266599,Male,20,Clothing,1,300.08,Cash,9/11/21,Metrocity
3,I173702,C988172,Female,66,Shoes,5,3000.85,Credit Card,16/05/2021,Metropol AVM
4,I337046,C189076,Female,53,Books,4,60.60,Cash,24/10/2021,Kanyon
...,...,...,...,...,...,...,...,...,...,...
99452,I219422,C441542,Female,45,Souvenir,5,58.65,Credit Card,21/09/2022,Kanyon
99453,I325143,C569580,Male,27,Food & Beverage,2,10.46,Cash,22/09/2021,Forum Istanbul
99454,I824010,C103292,Male,63,Food & Beverage,2,10.46,Debit Card,28/03/2021,Metrocity
99455,I702964,C800631,Male,56,Technology,4,4200.00,Cash,16/03/2021,Istinye Park


In [3]:
# checking datatypes
df.dtypes

invoice_no         object
customer_id        object
gender             object
age                 int64
category           object
quantity            int64
price             float64
payment_method     object
invoice_date       object
shopping_mall      object
dtype: object

In [4]:
#converting datatype of invoice_date from "Object" to "Datetime64[ns]"
df['invoice_date'] = pd.to_datetime(df['invoice_date'])

In [5]:
#datatypes of columns
df.dtypes

invoice_no                object
customer_id               object
gender                    object
age                        int64
category                  object
quantity                   int64
price                    float64
payment_method            object
invoice_date      datetime64[ns]
shopping_mall             object
dtype: object

In [6]:
#holiday dates from 2021 and 2022 are appended into a list and saved
us_holidays=[]
for date in holidays.UnitedStates(years=2021).items():
    us_holidays.append(str(date[0]))
    

for date in holidays.UnitedStates(years=2022).items():
    us_holidays.append(str(date[0]))
    

In [7]:
#including a new column "IS_HOLIDAY" based on the condition
df['Is_Holiday'] = [
     1 if str(val).split()[0] in us_holidays else 0 for val in df['invoice_date']
 ]

In [8]:
#including a new column for month using the date
df['month'] = df['invoice_date'].dt.month
df

Unnamed: 0,invoice_no,customer_id,gender,age,category,quantity,price,payment_method,invoice_date,shopping_mall,Is_Holiday,month
0,I138884,C241288,Female,28,Clothing,5,1500.40,Credit Card,2022-05-08,Kanyon,0,5
1,I317333,C111565,Male,21,Shoes,3,1800.51,Debit Card,2021-12-12,Forum Istanbul,0,12
2,I127801,C266599,Male,20,Clothing,1,300.08,Cash,2021-09-11,Metrocity,0,9
3,I173702,C988172,Female,66,Shoes,5,3000.85,Credit Card,2021-05-16,Metropol AVM,0,5
4,I337046,C189076,Female,53,Books,4,60.60,Cash,2021-10-24,Kanyon,0,10
...,...,...,...,...,...,...,...,...,...,...,...,...
99452,I219422,C441542,Female,45,Souvenir,5,58.65,Credit Card,2022-09-21,Kanyon,0,9
99453,I325143,C569580,Male,27,Food & Beverage,2,10.46,Cash,2021-09-22,Forum Istanbul,0,9
99454,I824010,C103292,Male,63,Food & Beverage,2,10.46,Debit Card,2021-03-28,Metrocity,0,3
99455,I702964,C800631,Male,56,Technology,4,4200.00,Cash,2021-03-16,Istinye Park,0,3


In [9]:
#Selecting necessary rows
df = df[['gender', 'age', 'category', 'quantity', 'price', 'payment_method','shopping_mall', 'Is_Holiday', 'month']]
df

Unnamed: 0,gender,age,category,quantity,price,payment_method,shopping_mall,Is_Holiday,month
0,Female,28,Clothing,5,1500.40,Credit Card,Kanyon,0,5
1,Male,21,Shoes,3,1800.51,Debit Card,Forum Istanbul,0,12
2,Male,20,Clothing,1,300.08,Cash,Metrocity,0,9
3,Female,66,Shoes,5,3000.85,Credit Card,Metropol AVM,0,5
4,Female,53,Books,4,60.60,Cash,Kanyon,0,10
...,...,...,...,...,...,...,...,...,...
99452,Female,45,Souvenir,5,58.65,Credit Card,Kanyon,0,9
99453,Male,27,Food & Beverage,2,10.46,Cash,Forum Istanbul,0,9
99454,Male,63,Food & Beverage,2,10.46,Debit Card,Metrocity,0,3
99455,Male,56,Technology,4,4200.00,Cash,Istinye Park,0,3


In [10]:
#Created a duplicate dataframe to convert the categorical values into numerical ones
sales_data = df

# Preprocess data by converting categorical variables to numerical
sales_data['gender'] = pd.Categorical(sales_data['gender'])
sales_data['gender'] = sales_data['gender'].cat.codes
sales_data['category'] = pd.Categorical(sales_data['category'])
sales_data['category'] = sales_data['category'].cat.codes
sales_data['payment_method'] = pd.Categorical(sales_data['payment_method'])
sales_data['payment_method'] = sales_data['payment_method'].cat.codes
sales_data['shopping_mall'] = pd.Categorical(sales_data['shopping_mall'])
sales_data['shopping_mall'] = sales_data['shopping_mall'].cat.codes

In [11]:
sales_data

Unnamed: 0,gender,age,category,quantity,price,payment_method,shopping_mall,Is_Holiday,month
0,0,28,1,5,1500.40,1,4,0,5
1,1,21,4,3,1800.51,2,2,0,12
2,1,20,1,1,300.08,0,6,0,9
3,0,66,4,5,3000.85,1,7,0,5
4,0,53,0,4,60.60,0,4,0,10
...,...,...,...,...,...,...,...,...,...
99452,0,45,5,5,58.65,1,4,0,9
99453,1,27,3,2,10.46,0,2,0,9
99454,1,63,3,2,10.46,2,6,0,3
99455,1,56,6,4,4200.00,0,3,0,3


In [12]:
sales_data.dtypes

gender               int8
age                 int64
category             int8
quantity            int64
price             float64
payment_method       int8
shopping_mall        int8
Is_Holiday          int64
month               int64
dtype: object

In [13]:
# Fit a linear regression model on the full dataset
model = LinearRegression()
model.fit(sales_data[['gender', 'age', 'category', 'price', 'payment_method', 'month', 'shopping_mall']], sales_data['quantity'])


LinearRegression()

In [None]:

# Set up Flask web app
app = Flask(__name__)

# Define route for homepage
@app.route('/')
def home():
    return render_template('index.html')

# Define route for handling user input and generating sales forecast
@app.route('/predict', methods=['POST'])
def predict():
    # Get user input from form
    gender = request.form['gender']
    age = int(request.form['age'])
    category = request.form['category']
    price = float(request.form['price'])
    payment_method = request.form['payment_method']
    invoice_date = request.form['invoice_date']
    shopping_mall = request.form['shopping_mall']
    
    # Preprocess user input
    next_month = pd.DataFrame({'gender': [gender], 'age': [age], 'category': [category], 'price': [price], 'payment_method': [payment_method], 'invoice_date': [invoice_date], 'shopping_mall': [shopping_mall]})
    next_month['invoice_date'] = pd.to_datetime(next_month['invoice_date'])
    next_month['month'] = next_month['invoice_date'].dt.month
    next_month['category'] = pd.Categorical(next_month['category'], categories=sales_data['category'].cat.categories)
    next_month['category'] = next_month['category'].cat.codes
    next_month['payment_method'] = pd.Categorical(next_month['payment_method'], categories=sales_data['payment_method'].cat.categories)
    next_month['payment_method'] = next_month['payment_method'].cat.codes
    next_month['shopping_mall'] = pd.Categorical(next_month['shopping_mall'], categories=sales_data['shopping_mall'].cat.categories)
    next_month['shopping_mall'] = next_month['shopping_mall'].cat.codes
    next_month = next_month[['gender', 'age', 'category', 'price', 'payment_method', 'month', 'shopping_mall']]
    
    # Use trained model to generate sales forecast
    forecast = model.predict(next_month)
    
    return render_template('index.html', prediction_text='Sales forecast for next month: {}'.format(forecast[0]))

# Run the app
if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [18/Apr/2023 13:51:06] "GET / HTTP/1.1" 200 -


In [None]:
exit

In [None]:
sns.barplot(x = 'category',y = 'price',data = df)

plt.title('End Use')
plt.ylabel('Consumption_(kBtu)')
plt.xlabel('End_Use')
plt.show()

In [None]:
df.hist(column='age',bins=15)

In [None]:
df.groupby(['category']).sum().plot(kind='pie', y='age')


In [None]:
Result=pd.crosstab(index=df['gender'],columns=df['category'])
print(Result)
 
# Grouped bar chart between GENDER and APPROVE_LOAN
Result.plot.bar(figsize=(7,4), rot=0)


In [None]:
Result1=pd.crosstab(index=df['shopping_mall'],columns=df['category'])
print(Result)
 
# Grouped bar chart between GENDER and APPROVE_LOAN
Result1.plot.bar(figsize=(17,7), rot=0)
