In [58]:
import pandas as pd

def load_data(file_paths):
    #Loads data from CSV files into pandas DataFrames
    dataframes = {}
    for name, path in file_paths.items():
        try:
            dataframes[name] = pd.read_csv(path, index_col=False)
        except FileNotFoundError:
            print(f"Error: File not found at {path}")
            return None
    return dataframes

def analyze_client_industries(industry_client_details):
    #Analyzes client industries and calculates the number of clients in specified industries.
    print("Unique Industries:", industry_client_details['industry'].unique())

    industry_counts = industry_client_details[industry_client_details['industry'].isin(['Block Chain', 'Finance Lending'])].groupby('industry')['client_id'].nunique()
    print("\nNumber of clients in Finance Lending and Block Chain:")
    print(industry_counts)

    all_industry_counts = industry_client_details.groupby('industry')['client_id'].nunique()
    print("\nNumber of clients in each industry:")
    print(all_industry_counts)

def analyze_subscription_renewals(subscription_information, industry_client_details, finanical_information):
    #Analyzes subscription renewals and calculates the industry with the highest renewal rate.
    # Rectified: Addressing bias from monthly/yearly renewals by adding a renewal factor
    subscription_information['renewal_factor'] = subscription_information['subscription_type'].apply(lambda x: 12 if x != 'Monthly' else 1)
    joined_df = pd.merge(subscription_information, industry_client_details[['client_id', 'industry']], left_on='client_id', right_on='client_id', how='left')
    highest_renewal_industry_adjusted = joined_df[joined_df['renewed'] == True].groupby('industry')['renewal_factor'].sum().idxmax()
    print("\nIndustry with the highest renewal rate (sum of renewal_factor, adjusted for renewal type):")
    print(highest_renewal_industry_adjusted)

    # If don't consider biased ness then top renewal industry will be
    highest_renewal_industry_unadjusted = joined_df[joined_df['renewed'] == True].groupby('industry')['renewed'].count().idxmax()
    print("\nIndustry with the highest renewal count (unadjusted for renewal type):")
    print(highest_renewal_industry_unadjusted)

    #Calculate average inflation rate when subscription renewed.
    merged_df = joined_df.merge(finanical_information, how='cross', suffixes=('_sub', '_fin'))
    merged_df=merged_df[(merged_df['start_date_sub']>= merged_df['start_date_fin']) & (merged_df['start_date_sub']<= merged_df['end_date_fin']) & merged_df['renewed']==True ]
    avg_inflation = merged_df.groupby('industry')['renewal_factor'].mean()
    print("\nAverage inflation rate (renewal_factor) when subscriptions were renewed, by industry:")
    print(avg_inflation)

def analyze_payment_data(payment_information):
    #Analyzes payment data and calculates the median amount paid each year.
    payment_information['payment_date'] = pd.to_datetime(payment_information['payment_date'])
    payment_information['year'] = payment_information['payment_date'].dt.year
    median_per_year = payment_information.groupby('year')['amount_paid'].median()
    print("\nMedian amount paid each year:")
    print(median_per_year)

def main():
    #Main function to execute the data analysis.
    file_paths = {
        'finanical_information': 'finanical_information.csv',
        'industry_client_details': 'industry_client_details.csv',
        'payment_information': 'payment_information.csv',
        'subscription_information': 'subscription_information.csv'
    }

    dataframes = load_data(file_paths)
    if dataframes is None:
        return

    analyze_client_industries(dataframes['industry_client_details'])
    analyze_subscription_renewals(dataframes['subscription_information'], dataframes['industry_client_details'], dataframes['finanical_information'])
    analyze_payment_data(dataframes['payment_information'])

if __name__ == "__main__":
    main()

Unique Industries: ['Finance Lending' 'Block Chain' 'Hyper Local' 'AI' 'Gaming']

Number of clients in Finance Lending and Block Chain:
industry
Block Chain        25
Finance Lending    22
Name: client_id, dtype: int64

Number of clients in each industry:
industry
AI                 11
Block Chain        25
Finance Lending    22
Gaming             22
Hyper Local        20
Name: client_id, dtype: int64

Industry with the highest renewal rate (sum of renewal_factor, adjusted for renewal type):
Gaming

Industry with the highest renewal count (unadjusted for renewal type):
Gaming

Average inflation rate (renewal_factor) when subscriptions were renewed, by industry:
industry
AI                 4.142857
Block Chain        6.000000
Finance Lending    3.750000
Gaming             6.500000
Hyper Local        3.444444
Name: renewal_factor, dtype: float64

Median amount paid each year:
year
2018    235.7
2019    360.9
2020    284.5
2021    306.8
2022    288.0
Name: amount_paid, dtype: float64
