# DeepCSAT — E-commerce Customer Satisfaction Score Prediction
End-to-end notebook: data preprocessing, model training (Keras), evaluation, and Streamlit snippet.
Assumes a CSV `data.csv` with columns: `review` (text), numeric/tabular features, and target `satisfaction_score` (float).


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
print('ready')


In [None]:
# Load data
df = pd.read_csv('data.csv')
df.head()


In [None]:
# Basic preprocessing — adjust column names as necessary
text_col = 'review'  # change if different
target_col = 'satisfaction_score'  # change if different

# Identify numeric columns (excluding target)
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if target_col in numeric_cols:
    numeric_cols.remove(target_col)

X_text = df[text_col].fillna('')
X_num = df[numeric_cols].fillna(0)
y = df[target_col].astype(float).values

# TF-IDF
tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
X_text_tfidf = tfidf.fit_transform(X_text).astype(np.float32)

# Scale numeric
scaler = StandardScaler()
X_num_scaled = scaler.fit_transform(X_num)

# Train/test split
from scipy import sparse
X_combined = sparse.hstack([X_text_tfidf, sparse.csr_matrix(X_num_scaled)])
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)

# Save vectorizer and scaler
os.makedirs('artifacts', exist_ok=True)
joblib.dump(tfidf, 'artifacts/tfidf.joblib')
joblib.dump(scaler, 'artifacts/scaler.joblib')
print('artifacts saved')


In [None]:
# Build a simple Keras regression model that accepts sparse input
import tensorflow as tf
from tensorflow.keras import layers

input_dim = X_train.shape[1]
model = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='linear')
])
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])
model.summary()


In [None]:
# Convert sparse to dense in batches during training using tf.data
def make_dataset(X, y, batch_size=32, shuffle=True):
    X = X.tocsr()
    def gen():
        for i in range(X.shape[0]):
            yield X.getrow(i).toarray().reshape(-1), y[i]
    ds = tf.data.Dataset.from_generator(gen, output_signature=(tf.TensorSpec(shape=(X.shape[1],), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.float32)))
    if shuffle:
        ds = ds.shuffle(2048)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_dataset(X_train, y_train, batch_size=64)
val_ds = make_dataset(X_test, y_test, batch_size=64, shuffle=False)

callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
]
history = model.fit(train_ds, validation_data=val_ds, epochs=50, callbacks=callbacks)

model.save('artifacts/deepcsat_model')
print('model saved')


In [None]:
# Evaluate
def predict_sparse(model, X):
    X = X.tocsr()
    preds = []
    for i in range(X.shape[0]):
        row = X.getrow(i).toarray().reshape(1, -1).astype(np.float32)
        preds.append(model.predict(row, verbose=0)[0,0])
    return np.array(preds)

preds = predict_sparse(model, X_test)
print('MAE:', mean_absolute_error(y_test, preds))
print('RMSE:', mean_squared_error(y_test, preds, squared=False))
print('R2:', r2_score(y_test, preds))


## Streamlit app snippet (save as `streamlit_app.py`)


In [None]:
streamlit_snippet = '''
import streamlit as st
import joblib
import numpy as np
import pandas as pd
from scipy import sparse
import tensorflow as tf

st.title('DeepCSAT — E-commerce Satisfaction Predictor')
tfidf = joblib.load('artifacts/tfidf.joblib')
scaler = joblib.load('artifacts/scaler.joblib')
model = tf.keras.models.load_model('artifacts/deepcsat_model')

review = st.text_area('Customer review', '')
# Add numeric inputs dynamically if you know names
num_input_1 = st.number_input('feature_1', value=0.0)
num_input_2 = st.number_input('feature_2', value=0.0)
if st.button('Predict'):
    X_text = tfidf.transform([review])
    X_num = scaler.transform([[num_input_1, num_input_2]])
    from scipy import sparse
    X = sparse.hstack([X_text, sparse.csr_matrix(X_num)])
    # predict
    row = X.tocsr().getrow(0).toarray().reshape(1, -1).astype('float32')
    pred = model.predict(row, verbose=0)[0,0]
    st.write('Predicted satisfaction score:', float(pred))
'''
print(streamlit_snippet)
