In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('path_to_dataset.csv')

# Display basic information about the dataset
print(data.info())

# Summary statistics of the dataset
print(data.describe())

# Analyze the age column
age_counts = data['age'].value_counts()
print(age_counts)

# Filter dataset for people having healthcare insurance at age greater than 40
over_40 = data[data['age'] > 40]
print(over_40)

# Region-wise analysis
region_counts = data['region'].value_counts()
print(region_counts)

# Number of non-smokers in each region
nonsmokers = data[data['smoker'] == 'no']
nonsmoker_counts = nonsmokers['region'].value_counts()
print(nonsmoker_counts)

# Sorting the dataset by 'smoker'
sorted_by_smoker = data.sort_values(by='smoker')
print(sorted_by_smoker)

# Dividing the dataset by age
age_groups = data.groupby('age').size()
print(age_groups)

# Dividing the charges into equal intervals
data['charges_bins'] = pd.cut(data['charges'], bins=4)
charges_groups = data['charges_bins'].value_counts()
print(charges_groups)

# Average charges based on age group and smoker status
pivot_table = data.pivot_table(values='charges', index='age', columns='smoker', aggfunc='mean')
print(pivot_table)

# Creating a crosstab for smoker status and gender
crosstab = pd.crosstab(data['sex'], data['smoker'])
print(crosstab)

# Visualization
# Frequency of ages having healthcare insurance
plt.figure(figsize=(10, 6))
sns.countplot(x='age', data=data)
plt.title('Frequency of Ages Having Healthcare Insurance')
plt.show()

# Scatter plot of age vs. insurance charges
plt.figure(figsize=(10, 6))
sns.scatterplot(x='age', y='charges', data=data)
plt.title('Insurance Charges vs. Age')
plt.show()

# Bar chart of insurance charges across different regions
plt.figure(figsize=(10, 6))
sns.barplot(x='region', y='charges', data=data)
plt.title('Insurance Charges Across Different Regions')
plt.show()

# Bar chart for sex and smoker status
plt.figure(figsize=(10, 6))
sns.countplot(x='sex', hue='smoker', data=data)
plt.title('Sex and Smoker Status')
plt.show()
