In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import pickle  # 🔹 For saving the model

# Load dataset
medical_dataset = pd.read_csv('insurance.csv')

# Initial data exploration
medical_dataset.head()
medical_dataset.info()
medical_dataset.shape
medical_dataset.isnull().sum()
medical_dataset.describe()

# Visualizations
sns.set()
plt.figure(figsize=(6,6))
sns.displot(medical_dataset['age'])
plt.title('Age Distribution')
plt.show()

plt.figure(figsize=(6,6))
sns.countplot(x='sex', data=medical_dataset)
plt.title('Sex')
plt.show()

sns.countplot(x='children', data=medical_dataset)
plt.title('Number of Children')
plt.show()

plt.figure(figsize=(5,5))
sns.countplot(x='smoker', data=medical_dataset)
plt.title('Smoking')
plt.show()

sns.countplot(x='region', data=medical_dataset)
plt.title('Region')
plt.show()

# Encoding categorical features
medical_dataset.replace({'sex': {'male': 0, 'female': 1}}, inplace=True)
medical_dataset.replace({'smoker': {'yes': 0, 'no': 1}}, inplace=True)
medical_dataset.replace({'region': {'southeast': 0, 'southwest': 1, 'northeast': 2, 'northwest': 3}}, inplace=True)

# Splitting data
X = medical_dataset.drop(columns='charges', axis=1)
Y = medical_dataset['charges']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# Train model
regressor = LinearRegression()
regressor.fit(X_train, Y_train)

# Evaluate
training_data_prediction = regressor.predict(X_train)
r2_train = metrics.r2_score(Y_train, training_data_prediction)
print("Training R² score:", r2_train)

test_data_prediction = regressor.predict(X_test)
r2_test = metrics.r2_score(Y_test, test_data_prediction)
print("Testing R² score:", r2_test)

# Predict a sample input
input_data = (37, 1, 30.8, 2, 1, 0)
input_data_as_array = np.asarray(input_data)
input_data_reshaped = input_data_as_array.reshape(1, -1)
prediction = regressor.predict(input_data_reshaped)

print("The person will get insurance money =", prediction[0])

# ✅ Save model to file
with open('insurance_model.pkl', 'wb') as file:
    pickle.dump(regressor, file)

print("✅ Model saved as 'insurance_model.pkl'")
