In [1]:
medical_charges_url = 'https://raw.githubusercontent.com/JovianML/opendatasets/master/data/medical-charges.csv'
from urllib.request import urlretrieve
urlretrieve(medical_charges_url, 'medical_csv')

('medical_csv', <http.client.HTTPMessage at 0x2cd15a6f910>)

In [2]:
import pandas as pd
data = pd.read_csv('medical_csv')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [4]:
data.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

# Bar Plot
plt.figure(figsize=(10, 6))
sns.barplot(x="region", y="charges", hue="smoker", data=data, ci=None)
plt.title('Average Charges per Region by Smoker Status')
plt.savefig('bar_plot.png')
plt.show()

# Box Plot
plt.figure(figsize=(10, 6))
sns.boxplot(x="region", y="charges", hue="sex", data=data)
plt.title('Box Plot of Charges per Region by Sex')
plt.savefig('box_plot.png')
plt.show()

# Histogram
plt.figure(figsize=(10, 6))
sns.histplot(data['charges'], kde=True, bins=30)
plt.title('Histogram of Charges')
plt.savefig('histogram.png')
plt.show()

# Scatter Plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x='bmi', y='charges', hue='smoker', style='sex', data=data)
plt.title('Scatter Plot of BMI vs Charges')
plt.savefig('scatter_plot.png')
plt.show()

# Heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Heatmap of Correlations')
plt.savefig('heatmap.png')
plt.show()

# Violin Plot
plt.figure(figsize=(10, 6))
sns.violinplot(x="region", y="charges", hue="sex", data=data, split=True)
plt.title('Violin Plot of Charges per Region by Sex')
plt.savefig('violin_plot.png')
plt.show()

# Pair Plot
plt.figure(figsize=(10, 6))
sns.pairplot(data, hue='smoker')
plt.title('Pair Plot of Dataset')
plt.savefig('pair_plot.png')
plt.show()

# Line Plot
plt.figure(figsize=(10, 6))
sns.lineplot(x='age', y='charges', hue='smoker', data=data)
plt.title('Line Plot of Charges by Age')
plt.savefig('line_plot.png')
plt.show()

# Swarm Plot
plt.figure(figsize=(10, 6))
sns.swarmplot(x="region", y="charges", hue="smoker", data=data)
plt.title('Swarm Plot of Charges per Region by Smoker Status')
plt.savefig('swarm_plot.png')
plt.show()

# Count Plot
plt.figure(figsize=(10, 6))
sns.countplot(x="children", hue="smoker", data=data)
plt.title('Count Plot of Children by Smoker Status')
plt.savefig('count_plot.png')
plt.show()
