# ðŸ§  CLV Prediction Project in Google Colab

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd
df = pd.read_csv('ecommerce_customers_clv_cleaned.csv')
df.head()

In [None]:
df.info()
df.describe()

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

sns.countplot(data=df, x='CLV_High')
plt.title('High CLV vs Low CLV Customers')
plt.show()

sns.histplot(df['TotalSpend'], kde=True)
plt.title('Total Spend Distribution')
plt.show()


In [None]:

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import StandardScaler

features = ['Age', 'TotalSpend', 'PurchaseCount', 'AvgSessionDuration', 'SupportTickets',
            'NewsletterSubscriber', 'TenureDays', 'RecencyDays', 'EngagementScore']
X = df[features]
y = df['CLV_High']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print('ROC AUC:', roc_auc_score(y_test, model.predict_proba(X_test_scaled)[:,1]))


In [None]:

importances = model.feature_importances_
feat_imp = pd.Series(importances, index=features).sort_values(ascending=False)

plt.figure(figsize=(10, 6))
feat_imp.plot(kind='bar')
plt.title('Feature Importances')
plt.show()


In [None]:

df.to_csv("processed_clv_data.csv", index=False)
files.download("processed_clv_data.csv")
