In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np

# Set style for better visualizations
sns.set_style("whitegrid")  # Use seaborn's set_style instead of plt.style.use
sns.set_palette("husl")

# Read the data
financial_df = pd.read_csv('finanical_information.csv')
payment_df = pd.read_csv('payment_information.csv')
subscription_df = pd.read_csv('subscription_information.csv')
industry_df = pd.read_csv('industry_client_details.csv')

In [9]:
def analyze_industry_clients():
    """
    Analyzes and visualizes the distribution of clients across industries,
    focusing on Finance Lending and Block Chain sectors.
    
    Creates a bar plot showing client counts and saves it as 'industry_counts.png'
    """
    # Count number of clients in each industry
    industry_counts = industry_df['industry'].value_counts()
    # Filter for target industries
    target_industries = industry_counts[['Finance Lending', 'Block Chain']]
    
    # Create and save visualization
    plt.figure(figsize=(10, 6))
    target_industries.plot(kind='bar')
    plt.title('Number of Finance Lending and Blockchain Clients')
    plt.xlabel('Industry')
    plt.ylabel('Number of Clients')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('industry_counts.png')
    plt.close()
    
    print("\n1. Industry Client Counts:")
    print(target_industries)


analyze_industry_clients()


1. Industry Client Counts:
industry
Finance Lending    22
Block Chain        25
Name: count, dtype: int64


In [10]:
def analyze_renewal_rates():
    """
    Calculates and visualizes subscription renewal rates by industry.
    
    Merges subscription and industry data to compute the percentage of
    renewals for each industry sector and saves plot as 'renewal_rates.png'
    """
    # Merge subscription and industry data
    merged_df = subscription_df.merge(industry_df, on='client_id')
    
    # Calculate renewal rate by industry
    industry_renewal = merged_df.groupby('industry')['renewed'].agg(['count', 'sum'])
    industry_renewal['renewal_rate'] = (industry_renewal['sum'] / industry_renewal['count']) * 100
    
    # Create and save visualization
    plt.figure(figsize=(12, 6))
    industry_renewal['renewal_rate'].plot(kind='bar')
    plt.title('Renewal Rate by Industry')
    plt.xlabel('Industry')
    plt.ylabel('Renewal Rate (%)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('renewal_rates.png')
    plt.close()
    
    print("\n2. Industry Renewal Rates:")
    print(industry_renewal['renewal_rate'].round(2))

analyze_renewal_rates()


2. Industry Renewal Rates:
industry
AI                 63.64
Block Chain        44.00
Finance Lending    54.55
Gaming             72.73
Hyper Local        45.00
Name: renewal_rate, dtype: float64


In [11]:
def analyze_inflation_rates():
    """
    Analyzes the relationship between inflation rates and subscription renewals.
    
    Merges subscription and financial data to examine inflation rates during
    renewal periods and creates a box plot saved as 'inflation_rates.png'
    """
    # Convert dates to datetime format
    subscription_df['start_date'] = pd.to_datetime(subscription_df['start_date'])
    financial_df['start_date'] = pd.to_datetime(financial_df['start_date'])
    
    # Merge subscription and financial data
    merged_df = subscription_df.merge(financial_df, on='start_date', how='left')
    
    # Calculate average inflation rate for renewed subscriptions
    avg_inflation = merged_df[merged_df['renewed'] == True]['inflation_rate'].mean()
    
    # Create and save visualization
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='renewed', y='inflation_rate', data=merged_df)
    plt.title('Inflation Rate Distribution for Renewed vs Non-Renewed Subscriptions')
    plt.xlabel('Subscription Renewed')
    plt.ylabel('Inflation Rate (%)')
    plt.tight_layout()
    plt.savefig('inflation_rates.png')
    plt.close()
    
    print("\n3. Average Inflation Rate during Renewals:")
    print(f"{avg_inflation:.2f}%")

analyze_inflation_rates()


3. Average Inflation Rate during Renewals:
3.84%


In [None]:
def analyze_payment_amounts():
    """
    Analyzes payment trends over time by payment method.
    
    Calculates and visualizes median payment amounts by year and payment method,
    creating a stacked bar plot saved as 'payment_amounts.png'
    """
    # Convert payment date to datetime and extract year
    payment_df['payment_date'] = pd.to_datetime(payment_df['payment_date'])
    payment_df['year'] = payment_df['payment_date'].dt.year
    
    # Calculate median amount by year and payment method
    yearly_medians = payment_df.groupby(['year', 'payment_method'])['amount_paid'].median().unstack()
    
    # Create and save visualization
    plt.figure(figsize=(12, 6))
    yearly_medians.plot(kind='bar', stacked=False)
    plt.title('Median Payment Amount by Year and Payment Method')
    plt.xlabel('Year')
    plt.ylabel('Median Amount ($)')
    plt.legend(title='Payment Method')
    plt.tight_layout()
    plt.savefig('payment_amounts.png')
    plt.close()
    
    print("\n4. Median Payment Amounts by Year and Payment Method:")
    print(yearly_medians.round(2))
    
analyze_payment_amounts()


4. Median Payment Amounts by Year and Payment Method:
payment_method  Bank Transfer  Check  Credit Card
year                                             
2018                   281.65  216.6       229.15
2019                   184.20  410.2       401.90
2020                   225.10  413.1       285.25
2021                   255.30  435.1       208.70
2022                   196.50  275.5       326.20


<Figure size 1200x600 with 0 Axes>