In [None]:
# Import necessary libraries and modules
import pandas as pd
from src.data_preprocessing import load_data
from src.visualization import (
    plot_age_distribution,
    plot_gender_distribution,
    plot_income_distribution,
    plot_total_spent_distribution,
    plot_product_category_distribution,
    plot_churn_status_distribution
)
from src.churn_analysis import (
    analyze_churn_by_age,
    analyze_churn_by_income,
    analyze_churn_by_spent
)



In [None]:
# Step 1: Load the Data
file_path = 'data/Customer_Churn_Data_Large.xlsx'

# Load the datasets from the Excel file
customer_demographics_df, transaction_history_df, customer_service_df, online_activity_df, churn_status_df = load_data(file_path)

# Step 2: Data Preprocessing (Merging Data)
# Merge relevant data for churn analysis
total_spent_df = transaction_history_df.groupby('CustomerID').agg(TotalSpent=('AmountSpent', 'sum')).reset_index()

# Merge all the dataframes together into one consolidated dataframe for analysis
churn_df = pd.merge(customer_demographics_df, churn_status_df, on='CustomerID', how='left')
churn_df = pd.merge(churn_df, total_spent_df, on='CustomerID', how='left')
churn_df = pd.merge(churn_df, customer_service_df[['CustomerID', 'InteractionType', 'ResolutionStatus']], on='CustomerID', how='left')

# Step 3: Exploratory Data Analysis (EDA) - Visualizations

# 3.1: Customer Demographics Visualizations
# Plot Age Distribution
plot_age_distribution(customer_demographics_df).show()

# Plot Gender Distribution
plot_gender_distribution(customer_demographics_df).show()

# Plot Income Level Distribution
plot_income_distribution(customer_demographics_df).show()

# 3.2: Transaction History Visualizations
# Plot Total Amount Spent Distribution
plot_total_spent_distribution(churn_df).show()

# Plot Product Category Distribution
plot_product_category_distribution(transaction_history_df).show()

# 3.3: Churn Analysis Visualizations
# Plot Churn Status Distribution
plot_churn_status_distribution(churn_df).show()

# Plot Churn Status by Age
analyze_churn_by_age(churn_df).show()

# Plot Churn Status by Income Level
analyze_churn_by_income(churn_df).show()

# Plot Churn Status by Total Amount Spent
analyze_churn_by_spent(churn_df).show()

# Step 4: Additional Analysis (Optional)
# Perform any other analysis here (such as correlation analysis, feature engineering, etc.)

# You can save the visualizations to output folder if required