In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_csv('/content/data.csv', encoding='ISO-8859-1')

# Initial cleaning and handling missing values
data['Description'].fillna('No description', inplace=True)
data.dropna(subset=['CustomerID'], inplace=True)
data = data[(data['Quantity'] > 0) & (data['UnitPrice'] > 0)]
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])

# Feature Engineering
data['TotalPrice'] = data['Quantity'] * data['UnitPrice']
data['Year'] = data['InvoiceDate'].dt.year
data['Month'] = data['InvoiceDate'].dt.month
data['DayOfWeek'] = data['InvoiceDate'].dt.dayofweek

purchase_count = data.groupby('CustomerID')['InvoiceNo'].count().reset_index()
purchase_count.columns = ['CustomerID', 'PurchaseCount']
data = data.merge(purchase_count, on='CustomerID', how='left')

average_purchase_value = data.groupby('CustomerID')['TotalPrice'].mean().reset_index()
average_purchase_value.columns = ['CustomerID', 'AvgPurchaseValue']
data = data.merge(average_purchase_value, on='CustomerID', how='left')

# Encoding and Normalizing
data = pd.get_dummies(data, columns=['Country'])
scaler = StandardScaler()
features_to_normalize = ['Quantity', 'UnitPrice', 'TotalPrice', 'PurchaseCount', 'AvgPurchaseValue']
data[features_to_normalize] = scaler.fit_transform(data[features_to_normalize])

# Define features and target
features = data.drop(columns=['InvoiceNo', 'StockCode', 'Description', 'InvoiceDate', 'CustomerID', 'Year', 'Month', 'DayOfWeek'])
target = data['PurchaseCount']  # Example target column

# Ensure no NaN values in features and target
features.fillna(0, inplace=True)
target.fillna(0, inplace=True)

# Train-test split
X = features.values
y = target.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure all data types are float
X_train = X_train.astype(float)
y_train = y_train.astype(float)
X_test = X_test.astype(float)
y_test = y_test.astype(float)

# Neural Network Model
model = Sequential()
model.add(Dense(64, input_dim=features.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Mean Absolute Error: {mae}')

# Predictions for market segmentation
predictions = model.predict(features.values)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50