In [56]:
import pandas as pd

def analyze_csv(file_path):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Remove duplicates and keep the last occurrence (to get the latest data)
    df_no_duplicates = df.drop_duplicates(keep='last')

    # Find the country with the highest population
    country_highest_population = df_no_duplicates[df_no_duplicates['2019 [YR2019]'] == df_no_duplicates['2019 [YR2019]'].max()]['Country Name'].values[0]

    # Find the country with the lowest population
    country_lowest_population = df_no_duplicates[df_no_duplicates['2019 [YR2019]'] == df_no_duplicates['2019 [YR2019]'].min()]['Country Name'].values[0]

    # Print the results
    print("Country with the highest population: ", country_highest_population)
    print("Country with the lowest population: ", country_lowest_population)
    #print(df.head(3))

if __name__ == "__main__":
    file_path = "Population.csv"
    analyze_csv(file_path)

Country with the highest population:  Brazil
Country with the lowest population:  British Virgin Islands


In [54]:
import pandas as pd

def analyze_insurance_data(csv_file):
    # Step 1: Read the CSV data into a pandas DataFrame
    df = pd.read_csv(csv_file, index_col=0)

    # Step 2: Clean the data and remove any unwanted characters like commas and spaces
    df = df.replace({',': '', ' ': ''}, regex=True)

    # Step 3: Handle parentheses and hyphens, and convert to integers
    df = df.applymap(lambda x: int(x.replace('(', '').replace(')', '').replace('-', '0')) if isinstance(x, str) else x)

    # Step 4: Handle NaN values and convert to integers
    df = df.fillna(0).astype(int)

    # Step 5: Calculate key metrics
    net_written_premiums = df.loc[df.index.str.lower() == 'net written  premiums', :]
    net_earned_premiums = df.loc[df.index.str.lower() == 'net earned premiums', :]

    # Check if the required keys exist in the DataFrame
    if net_written_premiums.empty or net_earned_premiums.empty:
        print("Error: 'Net Written Premiums' or 'Net Earned Premiums' not found in the data.")
        return

    # Calculate total written and earned premiums for each period
    total_written_premiums = net_written_premiums.sum()
    total_earned_premiums = net_earned_premiums.sum()

    # Calculate percentage change in written and earned premiums over time
    percentage_change_written_premiums = ((net_written_premiums.iloc[-1] - net_written_premiums.iloc[0]) / net_written_premiums.iloc[0]) * 100
    percentage_change_earned_premiums = ((net_earned_premiums.iloc[-1] - net_earned_premiums.iloc[0]) / net_earned_premiums.iloc[0]) * 100

    # Step 6: Find trends and insights
    # Identify periods with the highest and lowest written and earned premiums
    max_written_premiums_period = net_written_premiums.idxmax()
    min_written_premiums_period = net_written_premiums.idxmin()
    max_earned_premiums_period = net_earned_premiums.idxmax()
    min_earned_premiums_period = net_earned_premiums.idxmin()

    # Identify the average written and earned premiums
    average_written_premiums = net_written_premiums.mean()
    average_earned_premiums = net_earned_premiums.mean()

    # Print the key insights
    print("Key Insights from Insurance Data:")
    print("--------------------------------------------------")
    print(f"Total Written Premiums: {total_written_premiums}")
    print(f"Total Earned Premiums: {total_earned_premiums}")
    print("--------------------------------------------------")
    print("Percentage Change in Written Premiums:")
    print(percentage_change_written_premiums)
    print("--------------------------------------------------")
    print("Percentage Change in Earned Premiums:")
    print(percentage_change_earned_premiums)
    print("--------------------------------------------------")
    print(f"Period with Highest Written Premiums: {max_written_premiums_period}")
    print(f"Period with Lowest Written Premiums: {min_written_premiums_period}")
    print(f"Period with Highest Earned Premiums: {max_earned_premiums_period}")
    print(f"Period with Lowest Earned Premiums: {min_earned_premiums_period}")
    print("--------------------------------------------------")
    print(f"Average Written Premiums: {average_written_premiums}")
    print(f"Average Earned Premiums: {average_earned_premiums}")
    print("--------------------------------------------------")

# Example usage:
csv_file_path = "Insurance.csv"
analyze_insurance_data(csv_file_path)


Error: 'Net Written Premiums' or 'Net Earned Premiums' not found in the data.
